xml_decoder.go (4390B)
1 package xml 2 3 import ( 4 "encoding/xml" 5 "fmt" 6 "strings" 7 ) 8 9 // NodeDecoder is a XML decoder wrapper that is responsible to decoding 10 // a single XML Node element and it's nested member elements. This wrapper decoder 11 // takes in the start element of the top level node being decoded. 12 type NodeDecoder struct { 13 Decoder *xml.Decoder 14 StartEl xml.StartElement 15 } 16 17 // WrapNodeDecoder returns an initialized XMLNodeDecoder 18 func WrapNodeDecoder(decoder *xml.Decoder, startEl xml.StartElement) NodeDecoder { 19 return NodeDecoder{ 20 Decoder: decoder, 21 StartEl: startEl, 22 } 23 } 24 25 // Token on a Node Decoder returns a xml StartElement. It returns a boolean that indicates the 26 // a token is the node decoder's end node token; and an error which indicates any error 27 // that occurred while retrieving the start element 28 func (d NodeDecoder) Token() (t xml.StartElement, done bool, err error) { 29 for { 30 token, e := d.Decoder.Token() 31 if e != nil { 32 return t, done, e 33 } 34 35 // check if we reach end of the node being decoded 36 if el, ok := token.(xml.EndElement); ok { 37 return t, el == d.StartEl.End(), err 38 } 39 40 if t, ok := token.(xml.StartElement); ok { 41 return restoreAttrNamespaces(t), false, err 42 } 43 44 // skip token if it is a comment or preamble or empty space value due to indentation 45 // or if it's a value and is not expected 46 } 47 } 48 49 // restoreAttrNamespaces update XML attributes to restore the short namespaces found within 50 // the raw XML document. 51 func restoreAttrNamespaces(node xml.StartElement) xml.StartElement { 52 if len(node.Attr) == 0 { 53 return node 54 } 55 56 // Generate a mapping of XML namespace values to their short names. 57 ns := map[string]string{} 58 for _, a := range node.Attr { 59 if a.Name.Space == "xmlns" { 60 ns[a.Value] = a.Name.Local 61 break 62 } 63 } 64 65 for i, a := range node.Attr { 66 if a.Name.Space == "xmlns" { 67 continue 68 } 69 // By default, xml.Decoder will fully resolve these namespaces. So if you had <foo xmlns:bar=baz bar:bin=hi/> 70 // then by default the second attribute would have the `Name.Space` resolved to `baz`. But we need it to 71 // continue to resolve as `bar` so we can easily identify it later on. 72 if v, ok := ns[node.Attr[i].Name.Space]; ok { 73 node.Attr[i].Name.Space = v 74 } 75 } 76 return node 77 } 78 79 // GetElement looks for the given tag name at the current level, and returns the element if found, and 80 // skipping over non-matching elements. Returns an error if the node is not found, or if an error occurs while walking 81 // the document. 82 func (d NodeDecoder) GetElement(name string) (t xml.StartElement, err error) { 83 for { 84 token, done, err := d.Token() 85 if err != nil { 86 return t, err 87 } 88 if done { 89 return t, fmt.Errorf("%s node not found", name) 90 } 91 switch { 92 case strings.EqualFold(name, token.Name.Local): 93 return token, nil 94 default: 95 err = d.Decoder.Skip() 96 if err != nil { 97 return t, err 98 } 99 } 100 } 101 } 102 103 // Value provides an abstraction to retrieve char data value within an xml element. 104 // The method will return an error if it encounters a nested xml element instead of char data. 105 // This method should only be used to retrieve simple type or blob shape values as []byte. 106 func (d NodeDecoder) Value() (c []byte, err error) { 107 t, e := d.Decoder.Token() 108 if e != nil { 109 return c, e 110 } 111 112 endElement := d.StartEl.End() 113 114 switch ev := t.(type) { 115 case xml.CharData: 116 c = ev.Copy() 117 case xml.EndElement: // end tag or self-closing 118 if ev == endElement { 119 return []byte{}, err 120 } 121 return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t) 122 default: 123 return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t) 124 } 125 126 t, e = d.Decoder.Token() 127 if e != nil { 128 return c, e 129 } 130 131 if ev, ok := t.(xml.EndElement); ok { 132 if ev == endElement { 133 return c, err 134 } 135 } 136 137 return c, fmt.Errorf("expected end element %v, got %T type %v instead", endElement, t, t) 138 } 139 140 // FetchRootElement takes in a decoder and returns the first start element within the xml body. 141 // This function is useful in fetching the start element of an XML response and ignore the 142 // comments and preamble 143 func FetchRootElement(decoder *xml.Decoder) (startElement xml.StartElement, err error) { 144 for { 145 t, e := decoder.Token() 146 if e != nil { 147 return startElement, e 148 } 149 150 if startElement, ok := t.(xml.StartElement); ok { 151 return startElement, err 152 } 153 } 154 }