src

Go monorepo.
git clone git://code.dwrz.net/src
Log | Files | Refs

escape.go (3060B)


      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Copied and modified from Go 1.14 stdlib's encoding/xml
      6 
      7 package xml
      8 
      9 import (
     10 	"unicode/utf8"
     11 )
     12 
     13 // Copied from Go 1.14 stdlib's encoding/xml
     14 var (
     15 	escQuot = []byte(""") // shorter than """
     16 	escApos = []byte("'") // shorter than "'"
     17 	escAmp  = []byte("&")
     18 	escLT   = []byte("<")
     19 	escGT   = []byte(">")
     20 	escTab  = []byte("	")
     21 	escNL   = []byte("
")
     22 	escCR   = []byte("
")
     23 	escFFFD = []byte("\uFFFD") // Unicode replacement character
     24 
     25 	// Additional Escapes
     26 	escNextLine = []byte("…")
     27 	escLS       = []byte("
")
     28 )
     29 
     30 // Decide whether the given rune is in the XML Character Range, per
     31 // the Char production of https://www.xml.com/axml/testaxml.htm,
     32 // Section 2.2 Characters.
     33 func isInCharacterRange(r rune) (inrange bool) {
     34 	return r == 0x09 ||
     35 		r == 0x0A ||
     36 		r == 0x0D ||
     37 		r >= 0x20 && r <= 0xD7FF ||
     38 		r >= 0xE000 && r <= 0xFFFD ||
     39 		r >= 0x10000 && r <= 0x10FFFF
     40 }
     41 
     42 // TODO: When do we need to escape the string?
     43 // Based on encoding/xml escapeString from the Go Standard Library.
     44 // https://golang.org/src/encoding/xml/xml.go
     45 func escapeString(e writer, s string) {
     46 	var esc []byte
     47 	last := 0
     48 	for i := 0; i < len(s); {
     49 		r, width := utf8.DecodeRuneInString(s[i:])
     50 		i += width
     51 		switch r {
     52 		case '"':
     53 			esc = escQuot
     54 		case '\'':
     55 			esc = escApos
     56 		case '&':
     57 			esc = escAmp
     58 		case '<':
     59 			esc = escLT
     60 		case '>':
     61 			esc = escGT
     62 		case '\t':
     63 			esc = escTab
     64 		case '\n':
     65 			esc = escNL
     66 		case '\r':
     67 			esc = escCR
     68 		case '\u0085':
     69 			// Not escaped by stdlib
     70 			esc = escNextLine
     71 		case '\u2028':
     72 			// Not escaped by stdlib
     73 			esc = escLS
     74 		default:
     75 			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
     76 				esc = escFFFD
     77 				break
     78 			}
     79 			continue
     80 		}
     81 		e.WriteString(s[last : i-width])
     82 		e.Write(esc)
     83 		last = i
     84 	}
     85 	e.WriteString(s[last:])
     86 }
     87 
     88 // escapeText writes to w the properly escaped XML equivalent
     89 // of the plain text data s. If escapeNewline is true, newline
     90 // characters will be escaped.
     91 //
     92 // Based on encoding/xml escapeText from the Go Standard Library.
     93 // https://golang.org/src/encoding/xml/xml.go
     94 func escapeText(e writer, s []byte) {
     95 	var esc []byte
     96 	last := 0
     97 	for i := 0; i < len(s); {
     98 		r, width := utf8.DecodeRune(s[i:])
     99 		i += width
    100 		switch r {
    101 		case '"':
    102 			esc = escQuot
    103 		case '\'':
    104 			esc = escApos
    105 		case '&':
    106 			esc = escAmp
    107 		case '<':
    108 			esc = escLT
    109 		case '>':
    110 			esc = escGT
    111 		case '\t':
    112 			esc = escTab
    113 		case '\n':
    114 			// This always escapes newline, which is different than stdlib's optional
    115 			// escape of new line.
    116 			esc = escNL
    117 		case '\r':
    118 			esc = escCR
    119 		case '\u0085':
    120 			// Not escaped by stdlib
    121 			esc = escNextLine
    122 		case '\u2028':
    123 			// Not escaped by stdlib
    124 			esc = escLS
    125 		default:
    126 			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
    127 				esc = escFFFD
    128 				break
    129 			}
    130 			continue
    131 		}
    132 		e.Write(s[last : i-width])
    133 		e.Write(esc)
    134 		last = i
    135 	}
    136 	e.Write(s[last:])
    137 }