htmltools

Various command line tools to transform HTML documents
git clone git://git.entf.net/htmltools
Log | Files | Refs | README | LICENSE

commit 8024985b34f00f9f06197e8d6d7a914c8873d6c6
parent 1d97d4af801b78d5bb5445d5e9affd4bed1c82bf
Author: Lukas Henkel <lh@entf.net>
Date:   Tue, 16 Feb 2021 20:10:58 +0100

New tool htmlattr

Diffstat:
MMakefile | 2+-
Ahtmlattr/htmlattr.1.scd | 31+++++++++++++++++++++++++++++++
Ahtmlattr/main.go | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -1,4 +1,4 @@ -TOOLS = htmlremove htmltotext htmlunwrap htmlselect htmlindentheadings +TOOLS = htmlremove htmltotext htmlunwrap htmlselect htmlindentheadings htmlattr PREFIX = /usr/local MANS = $(shell find . -name '*.scd' | sed s/\.scd//) diff --git a/htmlattr/htmlattr.1.scd b/htmlattr/htmlattr.1.scd @@ -0,0 +1,31 @@ +HTMLATTR(1) + +# NAME + +htmlattr - print specific attribute values from top level nodes + +# SYNOPSIS + +*htmlattr* [-fs FIELD_SEPERATOR] ATTRIBUTES [_FILE_]... +# OPTIONS + +*-fs* + character that will be used to both split the ATTRIBUTES argument and + seperate the output columns. "," by default. + + +# DESCRIPTION + +Reads each file in sequence, iterates over all top level nodes and prints +requested attributes for each element line by line. If an element does not +contain any attributes requested, no line is printed. If some but not all +attributes exist on a node, the respective columns will be empty. htmlattr +prints the same number of columns for every node. If no FILE is given or FILE +is -, read standard input. + +If any FILE cannot be processed, a message prefixed with the FILE name will be +written to standard error. + +# AUTHOR + +Lukas Henkel <lh@entf.net> diff --git a/htmlattr/main.go b/htmlattr/main.go @@ -0,0 +1,58 @@ +package main // import "entf.net/htmltools/htmlattr" + +import ( + "flag" + "fmt" + "os" + "strings" + + "entf.net/htmltools/shared" + "golang.org/x/net/html" +) + +func main() { + var fs string + flag.StringVar(&fs, "fs", ",", "field seperator") + flag.Parse() + args := flag.Args() + if len(args) == 0 { + fmt.Println("usage: htmlattr [-fs FIELD_SEPERATOR] ATTRIBUTES [FILES...]") + os.Exit(1) + } + attrs := strings.Split(args[0], fs) + for i, attr := range attrs { + attrs[i] = strings.ToLower(attr) + } + shared.Main(args[1:], func(doc *html.Node) { + var body *html.Node + for n := doc.FirstChild.FirstChild; n != nil; n = n.NextSibling { + if strings.ToLower(n.Data) == "body" { + body = n + break + } + } + if body == nil { + fmt.Fprintln(os.Stderr, "document does not contain a body") + os.Exit(1) + } + for n := body.FirstChild; n != nil; n = n.NextSibling { + if n.Type != html.ElementNode { + continue + } + list := make([]string, len(attrs)) + var any bool + for i, attrn := range attrs { + for _, attr := range n.Attr { + if strings.ToLower(attr.Key) == attrn { + any = true + list[i] = attr.Val + } + } + } + line := strings.Join(list, fs) + if any { + fmt.Println(line) + } + } + }) +}