escape.go (3060B)
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Copied and modified from Go 1.14 stdlib's encoding/xml 6 7 package xml 8 9 import ( 10 "unicode/utf8" 11 ) 12 13 // Copied from Go 1.14 stdlib's encoding/xml 14 var ( 15 escQuot = []byte(""") // shorter than """ 16 escApos = []byte("'") // shorter than "'" 17 escAmp = []byte("&") 18 escLT = []byte("<") 19 escGT = []byte(">") 20 escTab = []byte("	") 21 escNL = []byte("
") 22 escCR = []byte("
") 23 escFFFD = []byte("\uFFFD") // Unicode replacement character 24 25 // Additional Escapes 26 escNextLine = []byte("…") 27 escLS = []byte("
") 28 ) 29 30 // Decide whether the given rune is in the XML Character Range, per 31 // the Char production of https://www.xml.com/axml/testaxml.htm, 32 // Section 2.2 Characters. 33 func isInCharacterRange(r rune) (inrange bool) { 34 return r == 0x09 || 35 r == 0x0A || 36 r == 0x0D || 37 r >= 0x20 && r <= 0xD7FF || 38 r >= 0xE000 && r <= 0xFFFD || 39 r >= 0x10000 && r <= 0x10FFFF 40 } 41 42 // TODO: When do we need to escape the string? 43 // Based on encoding/xml escapeString from the Go Standard Library. 44 // https://golang.org/src/encoding/xml/xml.go 45 func escapeString(e writer, s string) { 46 var esc []byte 47 last := 0 48 for i := 0; i < len(s); { 49 r, width := utf8.DecodeRuneInString(s[i:]) 50 i += width 51 switch r { 52 case '"': 53 esc = escQuot 54 case '\'': 55 esc = escApos 56 case '&': 57 esc = escAmp 58 case '<': 59 esc = escLT 60 case '>': 61 esc = escGT 62 case '\t': 63 esc = escTab 64 case '\n': 65 esc = escNL 66 case '\r': 67 esc = escCR 68 case '\u0085': 69 // Not escaped by stdlib 70 esc = escNextLine 71 case '\u2028': 72 // Not escaped by stdlib 73 esc = escLS 74 default: 75 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { 76 esc = escFFFD 77 break 78 } 79 continue 80 } 81 e.WriteString(s[last : i-width]) 82 e.Write(esc) 83 last = i 84 } 85 e.WriteString(s[last:]) 86 } 87 88 // escapeText writes to w the properly escaped XML equivalent 89 // of the plain text data s. If escapeNewline is true, newline 90 // characters will be escaped. 91 // 92 // Based on encoding/xml escapeText from the Go Standard Library. 93 // https://golang.org/src/encoding/xml/xml.go 94 func escapeText(e writer, s []byte) { 95 var esc []byte 96 last := 0 97 for i := 0; i < len(s); { 98 r, width := utf8.DecodeRune(s[i:]) 99 i += width 100 switch r { 101 case '"': 102 esc = escQuot 103 case '\'': 104 esc = escApos 105 case '&': 106 esc = escAmp 107 case '<': 108 esc = escLT 109 case '>': 110 esc = escGT 111 case '\t': 112 esc = escTab 113 case '\n': 114 // This always escapes newline, which is different than stdlib's optional 115 // escape of new line. 116 esc = escNL 117 case '\r': 118 esc = escCR 119 case '\u0085': 120 // Not escaped by stdlib 121 esc = escNextLine 122 case '\u2028': 123 // Not escaped by stdlib 124 esc = escLS 125 default: 126 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { 127 esc = escFFFD 128 break 129 } 130 continue 131 } 132 e.Write(s[last : i-width]) 133 e.Write(esc) 134 last = i 135 } 136 e.Write(s[last:]) 137 }