commit 8024985b34f00f9f06197e8d6d7a914c8873d6c6
parent 1d97d4af801b78d5bb5445d5e9affd4bed1c82bf
Author: Lukas Henkel <lh@entf.net>
Date: Tue, 16 Feb 2021 20:10:58 +0100
New tool htmlattr
Diffstat:
3 files changed, 90 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-TOOLS = htmlremove htmltotext htmlunwrap htmlselect htmlindentheadings
+TOOLS = htmlremove htmltotext htmlunwrap htmlselect htmlindentheadings htmlattr
PREFIX = /usr/local
MANS = $(shell find . -name '*.scd' | sed s/\.scd//)
diff --git a/htmlattr/htmlattr.1.scd b/htmlattr/htmlattr.1.scd
@@ -0,0 +1,31 @@
+HTMLATTR(1)
+
+# NAME
+
+htmlattr - print specific attribute values from top level nodes
+
+# SYNOPSIS
+
+*htmlattr* [-fs FIELD_SEPERATOR] ATTRIBUTES [_FILE_]...
+# OPTIONS
+
+*-fs*
+ character that will be used to both split the ATTRIBUTES argument and
+ seperate the output columns. "," by default.
+
+
+# DESCRIPTION
+
+Reads each file in sequence, iterates over all top level nodes and prints
+requested attributes for each element line by line. If an element does not
+contain any attributes requested, no line is printed. If some but not all
+attributes exist on a node, the respective columns will be empty. htmlattr
+prints the same number of columns for every node. If no FILE is given or FILE
+is -, read standard input.
+
+If any FILE cannot be processed, a message prefixed with the FILE name will be
+written to standard error.
+
+# AUTHOR
+
+Lukas Henkel <lh@entf.net>
diff --git a/htmlattr/main.go b/htmlattr/main.go
@@ -0,0 +1,58 @@
+package main // import "entf.net/htmltools/htmlattr"
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "strings"
+
+ "entf.net/htmltools/shared"
+ "golang.org/x/net/html"
+)
+
+func main() {
+ var fs string
+ flag.StringVar(&fs, "fs", ",", "field seperator")
+ flag.Parse()
+ args := flag.Args()
+ if len(args) == 0 {
+ fmt.Println("usage: htmlattr [-fs FIELD_SEPERATOR] ATTRIBUTES [FILES...]")
+ os.Exit(1)
+ }
+ attrs := strings.Split(args[0], fs)
+ for i, attr := range attrs {
+ attrs[i] = strings.ToLower(attr)
+ }
+ shared.Main(args[1:], func(doc *html.Node) {
+ var body *html.Node
+ for n := doc.FirstChild.FirstChild; n != nil; n = n.NextSibling {
+ if strings.ToLower(n.Data) == "body" {
+ body = n
+ break
+ }
+ }
+ if body == nil {
+ fmt.Fprintln(os.Stderr, "document does not contain a body")
+ os.Exit(1)
+ }
+ for n := body.FirstChild; n != nil; n = n.NextSibling {
+ if n.Type != html.ElementNode {
+ continue
+ }
+ list := make([]string, len(attrs))
+ var any bool
+ for i, attrn := range attrs {
+ for _, attr := range n.Attr {
+ if strings.ToLower(attr.Key) == attrn {
+ any = true
+ list[i] = attr.Val
+ }
+ }
+ }
+ line := strings.Join(list, fs)
+ if any {
+ fmt.Println(line)
+ }
+ }
+ })
+}