htmltools

Various command line tools to transform HTML documents
git clone git://git.entf.net/htmltools
Log | Files | Refs | README | LICENSE

commit 5b7300d5c50c0d21715b92c83801dc41c4590c49
parent bce8ffedc776db66a88a6179a785834793bcf984
Author: Lukas Henkel <lh@entf.net>
Date:   Thu, 28 Mar 2019 19:21:20 +0100

Cleaned up code and finished docs

Diffstat:
A.gitignore | 2++
AMakefile | 29+++++++++++++++++++++++++++++
Ahtmlremove/htmlremove.1.scd | 22++++++++++++++++++++++
Mhtmlremove/main.go | 4++--
Ahtmlunwrap/htmlunwrap.1.scd | 23+++++++++++++++++++++++
5 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +bin/ +*.1 diff --git a/Makefile b/Makefile @@ -0,0 +1,28 @@ +TOOLS = htmlremove htmltotext htmlunwrap +PREFIX = /usr/local +MANS = $(shell find . -name '*.scd' | sed s/\.scd//) + +all: $(TOOLS) $(MANS) + +$(TOOLS): + mkdir -p bin + go build -o bin/$@ entf.net/htmltools/$@ + +%.1: %.1.scd + scdoc < $< > $@ + +install: all + mkdir -p "$(PREFIX)/bin" + cp $(addprefix bin/, $(TOOLS)) "$(PREFIX)/bin/" + mkdir -p "$(PREFIX)/share/man/man1" + cp $(MANS) "$(PREFIX)/share/man/man1/" + +uninstall: + -rm -- $(addprefix $(PREFIX)/bin/, $(TOOLS)) + -rm -- $(addprefix $(PREFIX)/share/man/man1/, $(notdir $(MANS))) + +clean: + -rm -r bin/ + -rm -- $(MANS) + +.PHONY: all $(TOOLS) install uninstall clean+ \ No newline at end of file diff --git a/htmlremove/htmlremove.1.scd b/htmlremove/htmlremove.1.scd @@ -0,0 +1,22 @@ +HTMLREMOVE(1) + +# NAME + +htmlremove - remove elements from an HTML document + +# SYNOPSIS + +*htmlremove* SELECTOR [_FILE_]... + +# DESCRIPTION + +Reads each file in sequence and removes all elements matching SELECTOR. Prints +resulting documents to standard output. If no FILE is given or FILE is -, read +standard input. + +If any FILE cannot be processed, a message prefixed with the FILE name will be +written to standard error. + +# AUTHOR + +Lukas Henkel <lh@entf.net> diff --git a/htmlremove/main.go b/htmlremove/main.go @@ -22,11 +22,11 @@ func main() { os.Exit(1) } shared.Main(args[1:], func(doc *html.Node) { - parse(sel, doc) + remove(sel, doc) }) } -func parse(sel cascadia.Selector, doc *html.Node) { +func remove(sel cascadia.Selector, doc *html.Node) { for _, n := range sel.MatchAll(doc) { n.Parent.RemoveChild(n) } diff --git a/htmlunwrap/htmlunwrap.1.scd b/htmlunwrap/htmlunwrap.1.scd @@ -0,0 +1,23 @@ +HTMLUNWRAP(1) + +# NAME + +htmlunwrap - unwrap elements in an HTML document + +# SYNOPSIS + +*htmlunwrap* SELECTOR [_FILE_]... + +# DESCRIPTION + +Reads each file in sequence and removes all elements matching SELECTOR. The +children of those elements will be added to their grandparents, thus unwrapping +them from their previous parent element. Prints resulting documents to standard +output. If no FILE is given or FILE is -, read standard input. + +If any FILE cannot be processed, a message prefixed with the FILE name will be +written to standard error. + +# AUTHOR + +Lukas Henkel <lh@entf.net>