- TagName
- TagAttr
Parse HTML extract tags and attributes
examples/parse-html/parse_html.go
package main import ( "fmt" "io" "strings" "golang.org/x/net/html" ) func main() { body := `<html> <body> <h1>Main title</h1> <a href="https://code-maven.com/">Code Maven</a> <h2 id="subtitle" class="important">Some subtle title</h2> </body> </html>` reader := strings.NewReader(body) tokenizer := html.NewTokenizer(reader) for { tt := tokenizer.Next() if tt == html.ErrorToken { if tokenizer.Err() == io.EOF { return } fmt.Printf("Error: %v", tokenizer.Err()) return } tag, hasAttr := tokenizer.TagName() fmt.Printf("Tag: %v\n", string(tag)) if hasAttr { for { attrKey, attrValue, moreAttr := tokenizer.TagAttr() // if string(attrKey) == "" { // break // } fmt.Printf("Attr: %v\n", string(attrKey)) fmt.Printf("Attr: %v\n", string(attrValue)) fmt.Printf("Attr: %v\n", moreAttr) if !moreAttr { break } } } } }
Tag: html Tag: Tag: body Tag: Tag: h1 Tag: Tag: h1 Tag: Tag: a Attr: href Attr: https://code-maven.com/ Attr: false Tag: Tag: a Tag: Tag: h2 Attr: id Attr: subtitle Attr: true Attr: class Attr: important Attr: false Tag: Tag: h2 Tag: Tag: body Tag: Tag: html