code.dwrz.net

Go monorepo.
Log | Files | Refs

xml_decoder.go (4390B)


      1 package xml
      2 
      3 import (
      4 	"encoding/xml"
      5 	"fmt"
      6 	"strings"
      7 )
      8 
      9 // NodeDecoder is a XML decoder wrapper that is responsible to decoding
     10 // a single XML Node element and it's nested member elements. This wrapper decoder
     11 // takes in the start element of the top level node being decoded.
     12 type NodeDecoder struct {
     13 	Decoder *xml.Decoder
     14 	StartEl xml.StartElement
     15 }
     16 
     17 // WrapNodeDecoder returns an initialized XMLNodeDecoder
     18 func WrapNodeDecoder(decoder *xml.Decoder, startEl xml.StartElement) NodeDecoder {
     19 	return NodeDecoder{
     20 		Decoder: decoder,
     21 		StartEl: startEl,
     22 	}
     23 }
     24 
     25 // Token on a Node Decoder returns a xml StartElement. It returns a boolean that indicates the
     26 // a token is the node decoder's end node token; and an error which indicates any error
     27 // that occurred while retrieving the start element
     28 func (d NodeDecoder) Token() (t xml.StartElement, done bool, err error) {
     29 	for {
     30 		token, e := d.Decoder.Token()
     31 		if e != nil {
     32 			return t, done, e
     33 		}
     34 
     35 		// check if we reach end of the node being decoded
     36 		if el, ok := token.(xml.EndElement); ok {
     37 			return t, el == d.StartEl.End(), err
     38 		}
     39 
     40 		if t, ok := token.(xml.StartElement); ok {
     41 			return restoreAttrNamespaces(t), false, err
     42 		}
     43 
     44 		// skip token if it is a comment or preamble or empty space value due to indentation
     45 		// or if it's a value and is not expected
     46 	}
     47 }
     48 
     49 // restoreAttrNamespaces update XML attributes to restore the short namespaces found within
     50 // the raw XML document.
     51 func restoreAttrNamespaces(node xml.StartElement) xml.StartElement {
     52 	if len(node.Attr) == 0 {
     53 		return node
     54 	}
     55 
     56 	// Generate a mapping of XML namespace values to their short names.
     57 	ns := map[string]string{}
     58 	for _, a := range node.Attr {
     59 		if a.Name.Space == "xmlns" {
     60 			ns[a.Value] = a.Name.Local
     61 			break
     62 		}
     63 	}
     64 
     65 	for i, a := range node.Attr {
     66 		if a.Name.Space == "xmlns" {
     67 			continue
     68 		}
     69 		// By default, xml.Decoder will fully resolve these namespaces. So if you had <foo xmlns:bar=baz bar:bin=hi/>
     70 		// then by default the second attribute would have the `Name.Space` resolved to `baz`. But we need it to
     71 		// continue to resolve as `bar` so we can easily identify it later on.
     72 		if v, ok := ns[node.Attr[i].Name.Space]; ok {
     73 			node.Attr[i].Name.Space = v
     74 		}
     75 	}
     76 	return node
     77 }
     78 
     79 // GetElement looks for the given tag name at the current level, and returns the element if found, and
     80 // skipping over non-matching elements. Returns an error if the node is not found, or if an error occurs while walking
     81 // the document.
     82 func (d NodeDecoder) GetElement(name string) (t xml.StartElement, err error) {
     83 	for {
     84 		token, done, err := d.Token()
     85 		if err != nil {
     86 			return t, err
     87 		}
     88 		if done {
     89 			return t, fmt.Errorf("%s node not found", name)
     90 		}
     91 		switch {
     92 		case strings.EqualFold(name, token.Name.Local):
     93 			return token, nil
     94 		default:
     95 			err = d.Decoder.Skip()
     96 			if err != nil {
     97 				return t, err
     98 			}
     99 		}
    100 	}
    101 }
    102 
    103 // Value provides an abstraction to retrieve char data value within an xml element.
    104 // The method will return an error if it encounters a nested xml element instead of char data.
    105 // This method should only be used to retrieve simple type or blob shape values as []byte.
    106 func (d NodeDecoder) Value() (c []byte, err error) {
    107 	t, e := d.Decoder.Token()
    108 	if e != nil {
    109 		return c, e
    110 	}
    111 
    112 	endElement := d.StartEl.End()
    113 
    114 	switch ev := t.(type) {
    115 	case xml.CharData:
    116 		c = ev.Copy()
    117 	case xml.EndElement: // end tag or self-closing
    118 		if ev == endElement {
    119 			return []byte{}, err
    120 		}
    121 		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
    122 	default:
    123 		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
    124 	}
    125 
    126 	t, e = d.Decoder.Token()
    127 	if e != nil {
    128 		return c, e
    129 	}
    130 
    131 	if ev, ok := t.(xml.EndElement); ok {
    132 		if ev == endElement {
    133 			return c, err
    134 		}
    135 	}
    136 
    137 	return c, fmt.Errorf("expected end element %v, got %T type %v instead", endElement, t, t)
    138 }
    139 
    140 // FetchRootElement takes in a decoder and returns the first start element within the xml body.
    141 // This function is useful in fetching the start element of an XML response and ignore the
    142 // comments and preamble
    143 func FetchRootElement(decoder *xml.Decoder) (startElement xml.StartElement, err error) {
    144 	for {
    145 		t, e := decoder.Token()
    146 		if e != nil {
    147 			return startElement, e
    148 		}
    149 
    150 		if startElement, ok := t.(xml.StartElement); ok {
    151 			return startElement, err
    152 		}
    153 	}
    154 }