md2html

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 645af836a1bfa9dbd641c9bf18dce5edca09f9ea
Author: emmett1 <emmett1.2miligrams@protonmail.com>
Date:   Mon,  8 Sep 2025 00:26:45 +0800

update

Diffstat:
ALICENSE | 9+++++++++
AREADME.md | 4++++
Amd2html.sh | 299+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 312 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2025 emmett1 + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md @@ -0,0 +1,3 @@ +# md2html + +markdown to html converter +\ No newline at end of file diff --git a/md2html.sh b/md2html.sh @@ -0,0 +1,299 @@ +#!/bin/sh +# md2html.sh - Markdown to HTML converter +# +# usage: +# ./md2html.sh index.md > index.html +# cat index.md | ./md2html.sh > index.html + +escape_html() { + # escape &, <, > + printf '%s' "$1" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g' +} + +inline_format() { + # takes a string on $1, prints a single-line formatted string (no trailing newline) + t=$(escape_html "$1") + printf '%s' "$t" | sed \ + -e 's/!\[\([^]]*\)\](\([^)]*\))/<img alt="\1" src="\2">/g' \ + -e 's/\[\([^]]*\)\](\([^)]*\))/<a href="\2">\1<\/a>/g' \ + -e 's/\*\*\([^*][^*]*\)\*\*/<strong>\1<\/strong>/g' \ + -e 's/__\([^_][^_]*\)__/<strong>\1<\/strong>/g' \ + -e 's/~~\([^~][^~]*\)~~/<del>\1<\/del>/g' \ + -e 's/\*\([^*][^*]*\)\*/<em>\1<\/em>/g' \ + -e 's/_\([^_][^_]*\)_/<em>\1<\/em>/g' \ + -e 's/`\([^`]*\)`/<code>\1<\/code>/g' +} + +flush_lists() { + [ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0 + [ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0 +} + +close_blockquote() { + [ "$in_blockquote" -eq 1 ] && printf '%s\n' "</blockquote>" && in_blockquote=0 +} + +# if filename given, read from it +if [ $# -ge 1 ]; then + exec <"$1" +fi + +in_code=0 +in_ul=0 +in_ol=0 +in_blockquote=0 +push='' +prev_line='' + +# main loop with pushback support +while :; do + if [ -n "$push" ]; then + line="$push" + push='' + else + if ! IFS= read -r line; then + break + fi + fi + + # If line is empty: flush any pending prev_line, close blockquote, skip + if [ -z "$line" ]; then + if [ -n "$prev_line" ]; then + printf '%s\n' "<p>$(inline_format "$prev_line")</p>" + prev_line='' + fi + if [ "$in_blockquote" -eq 1 ]; then + printf '%s\n' "</blockquote>" + in_blockquote=0 + fi + # user requested no empty lines in output -> skip + continue + fi + + # If prev_line exists, check for Setext underline on current line + if [ -n "$prev_line" ]; then + # trim spaces for the check + trimmed=$(printf '%s' "$line" | sed 's/^[ \t]*//; s/[ \t]*$//') + case "$trimmed" in + [=][=]*) + # H1 setext + printf '%s\n' "<h1>$(inline_format "$prev_line")</h1>" + prev_line='' + continue + ;; + [-][-]*) + # H2 setext + printf '%s\n' "<h2>$(inline_format "$prev_line")</h2>" + prev_line='' + continue + ;; + esac + + # Not a setext underline: flush prev_line as paragraph before handling current line + printf '%s\n' "<p>$(inline_format "$prev_line")</p>" + prev_line='' + fi + + # If we are in a blockquote and current line is NOT a blockquote line, close it. + case "$line" in + '>'*) + # will be handled below + ;; + *) + if [ "$in_blockquote" -eq 1 ]; then + printf '%s\n' "</blockquote>" + in_blockquote=0 + fi + ;; + esac + + # fenced code block toggle (flush lists/blockquote already handled above) + case "$line" in + '```'*) + flush_lists + close_blockquote + # ensure any pending paragraph already flushed above + if [ "$in_code" -eq 0 ]; then + # start code block (no newline after tags) + printf '<pre><code>' + in_code=1 + else + # end code block (newline after closing tags) + printf '</code></pre>\n' + in_code=0 + fi + continue + ;; + esac + + # If currently inside a fenced code block, escape and print line + if [ "$in_code" -eq 1 ]; then + escape_html "$line" + printf '\n' + continue + fi + + # Table detection: look for a pipe-containing line followed by a separator line + case "$line" in + *'|'*) + # peek next line (sep) + if ! IFS= read -r sep; then sep=''; fi + + is_table=$(printf '%s\n' "$sep" | awk -F'|' ' + { + ok=1; + for(i=1;i<=NF;i++){ + s=$i; + sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); + if(s == "") { next } + if(s !~ /^:?-+:?$/) { ok=0; exit 1 } + } + if(ok) print "ok" + }' 2>/dev/null) + + if [ "$is_table" = "ok" ]; then + flush_lists + close_blockquote + + printf '%s\n' "<table>" + printf '%s\n' "<thead><tr>" + printf '%s\n' "$line" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \ + while IFS= read -r cell; do + cell_html=$(inline_format "$cell") + printf '<th>%s</th>' "$cell_html" + done + printf '\n%s\n' "</tr></thead>" + printf '%s\n' "<tbody>" + + # read data rows until a non-table line or EOF; use pushback when non-table found + while IFS= read -r row; do + [ -z "$row" ] && break + case "$row" in + *'|'*) ;; + *) push="$row"; break ;; + esac + + printf '%s\n' "<tr>" + printf '%s\n' "$row" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \ + while IFS= read -r cell; do + cell_html=$(inline_format "$cell") + printf '<td>%s</td>' "$cell_html" + done + printf '\n%s\n' "</tr>" + done + + printf '%s\n' "</tbody></table>" + continue + else + # not a table: push back sep (so it will be processed next) + push="$sep" + fi + ;; + esac + + # Horizontal rule + case "$line" in + '---'|'***'|'___') + flush_lists + close_blockquote + printf '%s\n' "<hr>" + continue + ;; + esac + + # ATX Headings (# …) — close lists before printing headings + case "$line" in + \#\#\#\#\#\#\ *) flush_lists; printf '%s\n' "<h6>$(inline_format "${line#\#\#\#\#\#\# }")</h6>"; continue ;; + \#\#\#\#\#\ *) flush_lists; printf '%s\n' "<h5>$(inline_format "${line#\#\#\#\#\# }")</h5>"; continue ;; + \#\#\#\#\ *) flush_lists; printf '%s\n' "<h4>$(inline_format "${line#\#\#\#\# }")</h4>"; continue ;; + \#\#\#\ *) flush_lists; printf '%s\n' "<h3>$(inline_format "${line#\#\#\# }")</h3>"; continue ;; + \#\#\ *) flush_lists; printf '%s\n' "<h2>$(inline_format "${line#\#\# }")</h2>"; continue ;; + \#\ *) flush_lists; printf '%s\n' "<h1>$(inline_format "${line#\# }")</h1>"; continue ;; + esac + + # Blockquote (lines starting with '>') + case "$line" in + '>'*) + flush_lists + if [ "$in_blockquote" -eq 0 ]; then + printf '%s\n' "<blockquote>" + in_blockquote=1 + fi + # strip leading '>' and optional spaces + content=$(printf '%s' "$line" | sed 's/^> *//') + printf '%s\n' "<p>$(inline_format "$content")</p>" + continue + ;; + esac + + # Unordered list + case "$line" in + [-+*]\ *) + # start or continue ul; close ordered list if open + close_blockquote + [ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0 + if [ "$in_ul" -eq 0 ]; then + printf '%s\n' "<ul>" + in_ul=1 + fi + item=${line#? } + item_html=$(inline_format "$item") + printf '%s\n' "<li>$item_html</li>" + continue + ;; + *) + if [ "$in_ul" -eq 1 ]; then + printf '%s\n' "</ul>" + in_ul=0 + fi + ;; + esac + + # Ordered list + case "$line" in + [0-9]*.\ *) + close_blockquote + [ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0 + if [ "$in_ol" -eq 0 ]; then + printf '%s\n' "<ol>" + in_ol=1 + fi + item=${line#*. } + item_html=$(inline_format "$item") + printf '%s\n' "<li>$item_html</li>" + continue + ;; + *) + if [ "$in_ol" -eq 1 ]; then + printf '%s\n' "</ol>" + in_ol=0 + fi + ;; + esac + + # If we reach here, the line is normal text (not a special block). + # Close any open lists (we only close lists when a non-list line appears). + if [ "$in_ul" -eq 1 ]; then + printf '%s\n' "</ul>" + in_ul=0 + fi + if [ "$in_ol" -eq 1 ]; then + printf '%s\n' "</ol>" + in_ol=0 + fi + + # Save line for possible Setext heading check in next iteration + prev_line="$line" +done + +# Flush pending prev_line +if [ -n "$prev_line" ]; then + printf '%s\n' "<p>$(inline_format "$prev_line")</p>" + prev_line='' +fi + +flush_lists +close_blockquote +[ "$in_code" -eq 1 ] && printf '</code></pre>\n' + +exit 0