htmltools.go (3081B)
1 package htmltools 2 3 import ( 4 "errors" 5 "fmt" 6 "strings" 7 8 "golang.org/x/net/html" 9 ) 10 11 var ( 12 ErrNodeIsNotADocumentNode = errors.New("Not a document node") 13 ErrNodeHasNoParent = errors.New("Node has no parent") 14 ) 15 16 type NodeMatchFunc func(*html.Node) bool 17 18 // Gets the body from an HTML document node. 19 func Body(doc *html.Node) (*html.Node, error) { 20 if doc.Type != html.DocumentNode { 21 return nil, ErrNodeIsNotADocumentNode 22 } 23 var htmln *html.Node 24 for n := doc.FirstChild; n != nil; n = n.NextSibling { 25 if n.Type == html.ElementNode && strings.ToLower(n.Data) == "html" { 26 htmln = n 27 break 28 } 29 } 30 if htmln == nil { 31 return nil, nil 32 } 33 var body *html.Node 34 for n := htmln.FirstChild; n != nil; n = n.NextSibling { 35 if strings.ToLower(n.Data) == "body" { 36 body = n 37 break 38 } 39 } 40 return body, nil 41 } 42 43 // Gets all direct children. 44 func Children(node *html.Node) []*html.Node { 45 nodes := make([]*html.Node, 0) 46 for n := node.FirstChild; n != nil; n = n.NextSibling { 47 nodes = append(nodes, n) 48 } 49 return nodes 50 } 51 52 func findRecursive(node *html.Node, nodeFunc func(*html.Node) bool, ch chan<- *html.Node) { 53 if nodeFunc == nil || nodeFunc(node) { 54 ch <- node 55 } 56 for _, c := range Children(node) { 57 findRecursive(c, nodeFunc, ch) 58 } 59 } 60 61 // Returns a channel providing all nodes that match nodeFunc recursively through 62 // the whole document. If nodeFunc is `nil`, all nodes match. 63 func FindRecursive(doc *html.Node, nodeFunc NodeMatchFunc) <-chan *html.Node { 64 ch := make(chan *html.Node) 65 go func() { 66 findRecursive(doc, nodeFunc, ch) 67 close(ch) 68 }() 69 return ch 70 } 71 72 // Returns all attribite values specified in attrs for nodes. 73 func Attr(attrs []string, nodes ...*html.Node) ([][]string, error) { 74 for i, attr := range attrs { 75 attrs[i] = strings.ToLower(attr) 76 } 77 results := make([][]string, 0) 78 for _, n := range nodes { 79 if n.Type != html.ElementNode { 80 continue 81 } 82 list := make([]string, len(attrs)) 83 var any bool 84 for i, attrn := range attrs { 85 for _, attr := range n.Attr { 86 if strings.ToLower(attr.Key) == attrn { 87 any = true 88 list[i] = attr.Val 89 } 90 } 91 } 92 if any { 93 results = append(results, list) 94 } 95 } 96 return results, nil 97 } 98 99 // Indents all headings by a certain level. 100 func IndentHeadings(level int, nodes ...*html.Node) error { 101 for _, n := range nodes { 102 switch strings.ToLower(n.Data) { 103 case "h1", "h2", "h3", "h4", "h5", "h6": 104 default: 105 continue 106 } 107 l := int(n.Data[1]) - 48 //HACK: ASCII to number 108 l += level 109 if l > 6 { 110 l = 6 111 } else if l < 1 { 112 l = 1 113 } 114 n.Data = fmt.Sprintf("h%d", l) 115 } 116 return nil 117 } 118 119 // Removes node from parent and replaces it by it's children. 120 func Unwrap(node *html.Node) error { 121 if node.Parent == nil { 122 return ErrNodeHasNoParent 123 } 124 for _, c := range Children(node) { 125 node.RemoveChild(c) 126 node.Parent.InsertBefore(c, node) 127 } 128 node.Parent.RemoveChild(node) 129 return nil 130 } 131 132 // Creates a NodeMatchFunc, matching a certain NodeType 133 func MatchNodeTypeFunc(nodeType html.NodeType) NodeMatchFunc { 134 return func(node *html.Node) bool { 135 return node.Type == nodeType 136 } 137 }