htmltools

Various command line tools to transform HTML documents
git clone git://git.entf.net/htmltools
Log | Files | Refs | README | LICENSE

commit 96ec22c55d4ec4d04176ea875dadebc9515a9fb7
parent f7df01c6c7c882635891edbc13585262f92c5717
Author: Lukas Henkel <lh@entf.net>
Date:   Mon, 17 Jun 2019 22:03:10 +0200

Add htmlselect

Diffstat:
MMakefile | 2+-
Ahtmlselect/htmlselect.1.scd | 21+++++++++++++++++++++
Ahtmlselect/main.go | 33+++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -1,4 +1,4 @@ -TOOLS = htmlremove htmltotext htmlunwrap +TOOLS = htmlremove htmltotext htmlunwrap htmlselect PREFIX = /usr/local MANS = $(shell find . -name '*.scd' | sed s/\.scd//) diff --git a/htmlselect/htmlselect.1.scd b/htmlselect/htmlselect.1.scd @@ -0,0 +1,21 @@ +HTMLREMOVE(1) + +# NAME + +htmlselect - print all elements matching a selector from an HTML document + +# SYNOPSIS + +*htmlselect* SELECTOR [_FILE_]... + +# DESCRIPTION + +Reads each file in sequence and prints all elements matching SELECTOR to +standard output. If no FILE is given or FILE is -, read standard input. + +If any FILE cannot be processed, a message prefixed with the FILE name will be +written to standard error. + +# AUTHOR + +Lukas Henkel <lh@entf.net> diff --git a/htmlselect/main.go b/htmlselect/main.go @@ -0,0 +1,33 @@ +package main // import "entf.net/htmltools/htmlselect" + +import ( + "fmt" + "os" + + "github.com/andybalholm/cascadia" + "golang.org/x/net/html" + + "entf.net/htmltools/shared" +) + +func main() { + args := os.Args[1:] + if len(args) == 0 { + fmt.Println("usage: htmlselect SELECTOR [FILES...]") + os.Exit(1) + } + sel, err := cascadia.Compile(args[0]) + if err != nil { + fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err) + os.Exit(1) + } + shared.Main(args[1:], func(doc *html.Node) { + dosel(sel, doc) + }) +} + +func dosel(sel cascadia.Selector, doc *html.Node) { + for _, n := range sel.MatchAll(doc) { + html.Render(os.Stdout, n) + } +}