werc-1.5.0-tweaks/bin/contrib/urlencode.awk

# Taken from http://www.shelldorado.com/scripts/cmds/urlencode
##########################################################################
# Title      :  urlencode - encode URL data
# Author     :  Heiner Steven (heiner.steven@odn.de)
# Date       :  2000-03-15
# Requires   :  awk
# Categories :  File Conversion, WWW, CGI
# SCCS-Id.   :  @(#) urlencode  1.4 06/10/29
##########################################################################
# Description
#   Encode data according to
#       RFC 1738: "Uniform Resource Locators (URL)" and
#       RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
#
#   This encoding is used i.e. for the MIME type
#   "application/x-www-form-urlencoded"
#
# Notes
#    o  The default behaviour is not to encode the line endings. This
#   may not be what was intended, because the result will be
#   multiple lines of output (which cannot be used in an URL or a
#   HTTP "POST" request). If the desired output should be one
#   line, use the "-l" option.
#
#    o  The "-l" option assumes, that the end-of-line is denoted by
#   the character LF (ASCII 10). This is not true for Windows or
#   Mac systems, where the end of a line is denoted by the two
#   characters CR LF (ASCII 13 10).
#   We use this for symmetry; data processed in the following way:
#       cat | urlencode -l | urldecode -l
#   should (and will) result in the original data
#
#    o  Large lines (or binary files) will break many AWK
#       implementations. If you get the message
#       awk: record `...' too long
#        record number xxx
#   consider using GNU AWK (gawk).
#
#    o  urlencode will always terminate it's output with an EOL
#       character
#
# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
# locales.
#
# See also
#   urldecode
##########################################################################

PN=`basename "$0"`          # Program name
VER='1.4'

: ${AWK=awk}

Usage () {
    echo >&2 "$PN - encode URL data, $VER
usage: $PN [-l] [file ...]
    -l:  encode line endings (result will be one line of output)

The default is to encode each input line on its own."
    exit 1
}

Msg () {
    for MsgLine
    do echo "$PN: $MsgLine" >&2
    done
}

Fatal () { Msg "$@"; exit 1; }

set -- `getopt hl "$@" 2>/dev/null` || Usage
[ $# -lt 1 ] && Usage           # "getopt" detected an error

EncodeEOL=no
while [ $# -gt 0 ]
do
    case "$1" in
        -l) EncodeEOL=yes;;
    --) shift; break;;
    -h) Usage;;
    -*) Usage;;
    *)  break;;         # First file name
    esac
    shift
done

LANG=C  export LANG
$AWK '
    BEGIN {
    # We assume an awk implementation that is just plain dumb.
    # We will convert an character to its ASCII value with the
    # table ord[], and produce two-digit hexadecimal output
    # without the printf("%02X") feature.

    EOL = "%0A"     # "end of line" string (encoded)
    split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
    hextab [0] = 0
    for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
    if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
    }
    {
    encoded = ""
    for ( i=1; i<=length ($0); ++i ) {
        c = substr ($0, i, 1)
        if ( c ~ /[a-zA-Z0-9.-]/ ) {
        encoded = encoded c     # safe character
        } else if ( c == " " ) {
        encoded = encoded "+"   # special handling
        } else {
        # unsafe character, encode it as a two-digit hex-number
        lo = ord [c] % 16
        hi = int (ord [c] / 16);
        encoded = encoded "%" hextab [hi] hextab [lo]
        }
    }
    if ( EncodeEOL ) {
        printf ("%s", encoded EOL)
    } else {
        print encoded
    }
    }
    END {
        #if ( EncodeEOL ) print ""
    }
' "$@"
feat: start keeping track of atomic changes on top of werc-1.5.0 2022-03-10 07:21:54 +00:00			`# Taken from http://www.shelldorado.com/scripts/cmds/urlencode`
			`##########################################################################`
			`# Title : urlencode - encode URL data`
			`# Author : Heiner Steven (heiner.steven@odn.de)`
			`# Date : 2000-03-15`
			`# Requires : awk`
			`# Categories : File Conversion, WWW, CGI`
			`# SCCS-Id. : @(#) urlencode 1.4 06/10/29`
			`##########################################################################`
			`# Description`
			`# Encode data according to`
			`# RFC 1738: "Uniform Resource Locators (URL)" and`
			`# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)`
			`#`
			`# This encoding is used i.e. for the MIME type`
			`# "application/x-www-form-urlencoded"`
			`#`
			`# Notes`
			`# o The default behaviour is not to encode the line endings. This`
			`# may not be what was intended, because the result will be`
			`# multiple lines of output (which cannot be used in an URL or a`
			`# HTTP "POST" request). If the desired output should be one`
			`# line, use the "-l" option.`
			`#`
			`# o The "-l" option assumes, that the end-of-line is denoted by`
			`# the character LF (ASCII 10). This is not true for Windows or`
			`# Mac systems, where the end of a line is denoted by the two`
			`# characters CR LF (ASCII 13 10).`
			`# We use this for symmetry; data processed in the following way:`
			`# cat \| urlencode -l \| urldecode -l`
			`# should (and will) result in the original data`
			`#`
			`# o Large lines (or binary files) will break many AWK`
			`# implementations. If you get the message`
			# awk: record `...' too long
			`# record number xxx`
			`# consider using GNU AWK (gawk).`
			`#`
			`# o urlencode will always terminate it's output with an EOL`
			`# character`
			`#`
			`# Thanks to Stefan Brozinski for pointing out a bug related to non-standard`
			`# locales.`
			`#`
			`# See also`
			`# urldecode`
			`##########################################################################`

			PN=`basename "$0"` # Program name
			`VER='1.4'`

			`: ${AWK=awk}`

			`Usage () {`
			`echo >&2 "$PN - encode URL data, $VER`
			`usage: $PN [-l] [file ...]`
			`-l: encode line endings (result will be one line of output)`

			`The default is to encode each input line on its own."`
			`exit 1`
			`}`

			`Msg () {`
			`for MsgLine`
			`do echo "$PN: $MsgLine" >&2`
			`done`
			`}`

			`Fatal () { Msg "$@"; exit 1; }`

			set -- `getopt hl "$@" 2>/dev/null` \|\| Usage
			`[ $# -lt 1 ] && Usage # "getopt" detected an error`

			`EncodeEOL=no`
			`while [ $# -gt 0 ]`
			`do`
			`case "$1" in`
			`-l) EncodeEOL=yes;;`
			`--) shift; break;;`
			`-h) Usage;;`
			`-*) Usage;;`
			`*) break;; # First file name`
			`esac`
			`shift`
			`done`

			`LANG=C export LANG`
			`$AWK '`
			`BEGIN {`
			`# We assume an awk implementation that is just plain dumb.`
			`# We will convert an character to its ASCII value with the`
			`# table ord[], and produce two-digit hexadecimal output`
			`# without the printf("%02X") feature.`

			`EOL = "%0A" # "end of line" string (encoded)`
			`split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")`
			`hextab [0] = 0`
			`for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0`
			`if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0`
			`}`
			`{`
			`encoded = ""`
			`for ( i=1; i<=length ($0); ++i ) {`
			`c = substr ($0, i, 1)`
			`if ( c ~ /[a-zA-Z0-9.-]/ ) {`
			`encoded = encoded c # safe character`
			`} else if ( c == " " ) {`
			`encoded = encoded "+" # special handling`
			`} else {`
			`# unsafe character, encode it as a two-digit hex-number`
			`lo = ord [c] % 16`
			`hi = int (ord [c] / 16);`
			`encoded = encoded "%" hextab [hi] hextab [lo]`
			`}`
			`}`
			`if ( EncodeEOL ) {`
			`printf ("%s", encoded EOL)`
			`} else {`
			`print encoded`
			`}`
			`}`
			`END {`
			`#if ( EncodeEOL ) print ""`
			`}`
			`' "$@"`