commit 5b7300d5c50c0d21715b92c83801dc41c4590c49
parent bce8ffedc776db66a88a6179a785834793bcf984
Author: Lukas Henkel <lh@entf.net>
Date: Thu, 28 Mar 2019 19:21:20 +0100
Cleaned up code and finished docs
Diffstat:
5 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+bin/
+*.1
diff --git a/Makefile b/Makefile
@@ -0,0 +1,28 @@
+TOOLS = htmlremove htmltotext htmlunwrap
+PREFIX = /usr/local
+MANS = $(shell find . -name '*.scd' | sed s/\.scd//)
+
+all: $(TOOLS) $(MANS)
+
+$(TOOLS):
+ mkdir -p bin
+ go build -o bin/$@ entf.net/htmltools/$@
+
+%.1: %.1.scd
+ scdoc < $< > $@
+
+install: all
+ mkdir -p "$(PREFIX)/bin"
+ cp $(addprefix bin/, $(TOOLS)) "$(PREFIX)/bin/"
+ mkdir -p "$(PREFIX)/share/man/man1"
+ cp $(MANS) "$(PREFIX)/share/man/man1/"
+
+uninstall:
+ -rm -- $(addprefix $(PREFIX)/bin/, $(TOOLS))
+ -rm -- $(addprefix $(PREFIX)/share/man/man1/, $(notdir $(MANS)))
+
+clean:
+ -rm -r bin/
+ -rm -- $(MANS)
+
+.PHONY: all $(TOOLS) install uninstall clean+
\ No newline at end of file
diff --git a/htmlremove/htmlremove.1.scd b/htmlremove/htmlremove.1.scd
@@ -0,0 +1,22 @@
+HTMLREMOVE(1)
+
+# NAME
+
+htmlremove - remove elements from an HTML document
+
+# SYNOPSIS
+
+*htmlremove* SELECTOR [_FILE_]...
+
+# DESCRIPTION
+
+Reads each file in sequence and removes all elements matching SELECTOR. Prints
+resulting documents to standard output. If no FILE is given or FILE is -, read
+standard input.
+
+If any FILE cannot be processed, a message prefixed with the FILE name will be
+written to standard error.
+
+# AUTHOR
+
+Lukas Henkel <lh@entf.net>
diff --git a/htmlremove/main.go b/htmlremove/main.go
@@ -22,11 +22,11 @@ func main() {
os.Exit(1)
}
shared.Main(args[1:], func(doc *html.Node) {
- parse(sel, doc)
+ remove(sel, doc)
})
}
-func parse(sel cascadia.Selector, doc *html.Node) {
+func remove(sel cascadia.Selector, doc *html.Node) {
for _, n := range sel.MatchAll(doc) {
n.Parent.RemoveChild(n)
}
diff --git a/htmlunwrap/htmlunwrap.1.scd b/htmlunwrap/htmlunwrap.1.scd
@@ -0,0 +1,23 @@
+HTMLUNWRAP(1)
+
+# NAME
+
+htmlunwrap - unwrap elements in an HTML document
+
+# SYNOPSIS
+
+*htmlunwrap* SELECTOR [_FILE_]...
+
+# DESCRIPTION
+
+Reads each file in sequence and removes all elements matching SELECTOR. The
+children of those elements will be added to their grandparents, thus unwrapping
+them from their previous parent element. Prints resulting documents to standard
+output. If no FILE is given or FILE is -, read standard input.
+
+If any FILE cannot be processed, a message prefixed with the FILE name will be
+written to standard error.
+
+# AUTHOR
+
+Lukas Henkel <lh@entf.net>