commit 96ec22c55d4ec4d04176ea875dadebc9515a9fb7
parent f7df01c6c7c882635891edbc13585262f92c5717
Author: Lukas Henkel <lh@entf.net>
Date: Mon, 17 Jun 2019 22:03:10 +0200
Add htmlselect
Diffstat:
3 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-TOOLS = htmlremove htmltotext htmlunwrap
+TOOLS = htmlremove htmltotext htmlunwrap htmlselect
PREFIX = /usr/local
MANS = $(shell find . -name '*.scd' | sed s/\.scd//)
diff --git a/htmlselect/htmlselect.1.scd b/htmlselect/htmlselect.1.scd
@@ -0,0 +1,21 @@
+HTMLREMOVE(1)
+
+# NAME
+
+htmlselect - print all elements matching a selector from an HTML document
+
+# SYNOPSIS
+
+*htmlselect* SELECTOR [_FILE_]...
+
+# DESCRIPTION
+
+Reads each file in sequence and prints all elements matching SELECTOR to
+standard output. If no FILE is given or FILE is -, read standard input.
+
+If any FILE cannot be processed, a message prefixed with the FILE name will be
+written to standard error.
+
+# AUTHOR
+
+Lukas Henkel <lh@entf.net>
diff --git a/htmlselect/main.go b/htmlselect/main.go
@@ -0,0 +1,33 @@
+package main // import "entf.net/htmltools/htmlselect"
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools/shared"
+)
+
+func main() {
+ args := os.Args[1:]
+ if len(args) == 0 {
+ fmt.Println("usage: htmlselect SELECTOR [FILES...]")
+ os.Exit(1)
+ }
+ sel, err := cascadia.Compile(args[0])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
+ os.Exit(1)
+ }
+ shared.Main(args[1:], func(doc *html.Node) {
+ dosel(sel, doc)
+ })
+}
+
+func dosel(sel cascadia.Selector, doc *html.Node) {
+ for _, n := range sel.MatchAll(doc) {
+ html.Render(os.Stdout, n)
+ }
+}