commit c4f4270f81fb5506346c1fbb474f1e5939634918
parent acb1b50c425833e1e92883c20c663f02b175a065
Author: Lukas Henkel <lh@entf.net>
Date: Tue, 16 Feb 2021 20:43:32 +0100
More ideomatic project structure
Diffstat:
21 files changed, 321 insertions(+), 314 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,28 +1,35 @@
-TOOLS = htmlremove htmltotext htmlunwrap htmlselect htmlindentheadings htmlattr
+VPATH = doc
PREFIX = /usr/local
-MANS = $(shell find . -name '*.scd' | sed s/\.scd//)
-all: $(TOOLS) $(MANS)
+TOOLS := \
+ htmlattr \
+ htmlindentheadings \
+ htmlremove \
+ htmlselect \
+ htmltotext \
+ htmlunwrap
+DOCS := $(addsuffix .1, $(TOOLS))
-$(TOOLS):
- mkdir -p bin
- go build -o bin/$@ entf.net/htmltools/$@
+SRC := $(shell find . -name "*.go")
+
+all: $(TOOLS) $(DOCS)
+
+$(TOOLS): $(SRC)
+ go build entf.net/htmltools/cmd/$@
%.1: %.1.scd
scdoc < $< > $@
install: all
- mkdir -p "$(PREFIX)/bin"
- cp $(addprefix bin/, $(TOOLS)) "$(PREFIX)/bin/"
- mkdir -p "$(PREFIX)/share/man/man1"
- cp $(MANS) "$(PREFIX)/share/man/man1/"
+ install -Dm755 $(TOOLS) -t "$(PREFIX)/bin/"
+ install -Dm644 $(DOCS) -t "$(PREFIX)/share/man/man1/"
uninstall:
-rm -- $(addprefix $(PREFIX)/bin/, $(TOOLS))
- -rm -- $(addprefix $(PREFIX)/share/man/man1/, $(notdir $(MANS)))
+ -rm -- $(addprefix $(PREFIX)/share/man/man1/, $(DOCS))
clean:
- -rm -r bin/
- -rm -- $(MANS)
+ -rm -- $(TOOLS)
+ -rm -- $(DOCS)
-.PHONY: all $(TOOLS) install uninstall clean-
\ No newline at end of file
+.PHONY: all install uninstall clean+
\ No newline at end of file
diff --git a/cmd/htmlattr/main.go b/cmd/htmlattr/main.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "strings"
+
+ "entf.net/htmltools"
+ "golang.org/x/net/html"
+)
+
+func main() {
+ var fs string
+ flag.StringVar(&fs, "fs", ",", "field seperator")
+ flag.Parse()
+ args := flag.Args()
+ if len(args) == 0 {
+ fmt.Println("usage: htmlattr [-fs FIELD_SEPERATOR] ATTRIBUTES [FILES...]")
+ os.Exit(1)
+ }
+ attrs := strings.Split(args[0], fs)
+ for i, attr := range attrs {
+ attrs[i] = strings.ToLower(attr)
+ }
+ htmltools.Main(args[1:], func(doc *html.Node) {
+ var body *html.Node
+ for n := doc.FirstChild.FirstChild; n != nil; n = n.NextSibling {
+ if strings.ToLower(n.Data) == "body" {
+ body = n
+ break
+ }
+ }
+ if body == nil {
+ fmt.Fprintln(os.Stderr, "document does not contain a body")
+ os.Exit(1)
+ }
+ for n := body.FirstChild; n != nil; n = n.NextSibling {
+ if n.Type != html.ElementNode {
+ continue
+ }
+ list := make([]string, len(attrs))
+ var any bool
+ for i, attrn := range attrs {
+ for _, attr := range n.Attr {
+ if strings.ToLower(attr.Key) == attrn {
+ any = true
+ list[i] = attr.Val
+ }
+ }
+ }
+ line := strings.Join(list, fs)
+ if any {
+ fmt.Println(line)
+ }
+ }
+ })
+}
diff --git a/cmd/htmlindentheadings/main.go b/cmd/htmlindentheadings/main.go
@@ -0,0 +1,51 @@
+package main
+
+import (
+ "fmt"
+ "os"
+ "strconv"
+
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools"
+)
+
+const usage = "usage: htmlindentheadings INDENT_LEVELS [FILES...]"
+
+func main() {
+ args := os.Args[1:]
+ if len(args) == 0 {
+ fmt.Println(usage)
+ os.Exit(1)
+ }
+ lvls, err := strconv.Atoi(args[0])
+ if err != nil {
+ fmt.Println(usage)
+ os.Exit(1)
+ }
+ htmltools.Main(args[1:], func(doc *html.Node) {
+ visit(lvls, doc)
+ html.Render(os.Stdout, doc)
+ })
+}
+
+func indent(lvls int, tag string) string {
+ l := int(tag[1]) - 48
+ l += lvls
+ if l > 6 {
+ l = 6
+ }
+ return fmt.Sprintf("h%d", l)
+}
+
+func visit(lvls int, n *html.Node) {
+ if n.Type == html.ElementNode {
+ switch n.Data {
+ case "h1", "h2", "h3", "h4", "h5", "h6":
+ n.Data = indent(lvls, n.Data)
+ }
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ visit(lvls, c)
+ }
+}
diff --git a/cmd/htmlremove/main.go b/cmd/htmlremove/main.go
@@ -0,0 +1,34 @@
+package main
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools"
+)
+
+func main() {
+ args := os.Args[1:]
+ if len(args) == 0 {
+ fmt.Println("usage: htmlremove SELECTOR [FILES...]")
+ os.Exit(1)
+ }
+ sel, err := cascadia.Compile(args[0])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
+ os.Exit(1)
+ }
+ htmltools.Main(args[1:], func(doc *html.Node) {
+ remove(sel, doc)
+ })
+}
+
+func remove(sel cascadia.Selector, doc *html.Node) {
+ for _, n := range sel.MatchAll(doc) {
+ n.Parent.RemoveChild(n)
+ }
+ html.Render(os.Stdout, doc)
+}
diff --git a/cmd/htmlselect/main.go b/cmd/htmlselect/main.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools"
+)
+
+func main() {
+ args := os.Args[1:]
+ if len(args) == 0 {
+ fmt.Println("usage: htmlselect SELECTOR [FILES...]")
+ os.Exit(1)
+ }
+ sel, err := cascadia.Compile(args[0])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
+ os.Exit(1)
+ }
+ htmltools.Main(args[1:], func(doc *html.Node) {
+ dosel(sel, doc)
+ })
+}
+
+func dosel(sel cascadia.Selector, doc *html.Node) {
+ for _, n := range sel.MatchAll(doc) {
+ buf := &bytes.Buffer{}
+ html.Render(buf, n)
+ l := buf.String()
+ l = strings.ReplaceAll(l, "\n", " ")
+ l = strings.TrimSpace(l)
+ fmt.Println(l)
+ }
+}
diff --git a/cmd/htmltotext/main.go b/cmd/htmltotext/main.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+ "fmt"
+ "os"
+ "strings"
+
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools"
+)
+
+func main() {
+ htmltools.Main(os.Args[1:], visit)
+}
+
+func visit(n *html.Node) {
+ if n.Type == html.TextNode {
+ if t := strings.TrimSpace(n.Data); t != "" {
+ fmt.Println(t)
+ }
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ visit(c)
+ }
+}
diff --git a/cmd/htmlunwrap/main.go b/cmd/htmlunwrap/main.go
@@ -0,0 +1,42 @@
+package main
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+
+ "entf.net/htmltools"
+)
+
+func main() {
+ args := os.Args[1:]
+ if len(args) == 0 {
+ fmt.Println("usage: htmlremove SELECTOR [FILES...]")
+ os.Exit(1)
+ }
+ sel, err := cascadia.Compile(args[0])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
+ os.Exit(1)
+ }
+ htmltools.Main(args[1:], func(doc *html.Node) {
+ unwrap(sel, doc)
+ })
+}
+
+func unwrap(sel cascadia.Selector, doc *html.Node) {
+ for _, n := range sel.MatchAll(doc) {
+ cs := make([]*html.Node, 0)
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ cs = append(cs, c)
+ }
+ for _, c := range cs {
+ n.RemoveChild(c)
+ n.Parent.InsertBefore(c, n)
+ }
+ n.Parent.RemoveChild(n)
+ }
+ html.Render(os.Stdout, doc)
+}
diff --git a/htmlattr/htmlattr.1.scd b/doc/htmlattr.1.scd
diff --git a/htmlindentheadings/htmlindentheadings.1.scd b/doc/htmlindentheadings.1.scd
diff --git a/htmlremove/htmlremove.1.scd b/doc/htmlremove.1.scd
diff --git a/htmlselect/htmlselect.1.scd b/doc/htmlselect.1.scd
diff --git a/htmltotext/htmltotext.1.scd b/doc/htmltotext.1.scd
diff --git a/htmlunwrap/htmlunwrap.1.scd b/doc/htmlunwrap.1.scd
diff --git a/htmlattr/main.go b/htmlattr/main.go
@@ -1,58 +0,0 @@
-package main // import "entf.net/htmltools/htmlattr"
-
-import (
- "flag"
- "fmt"
- "os"
- "strings"
-
- "entf.net/htmltools/shared"
- "golang.org/x/net/html"
-)
-
-func main() {
- var fs string
- flag.StringVar(&fs, "fs", ",", "field seperator")
- flag.Parse()
- args := flag.Args()
- if len(args) == 0 {
- fmt.Println("usage: htmlattr [-fs FIELD_SEPERATOR] ATTRIBUTES [FILES...]")
- os.Exit(1)
- }
- attrs := strings.Split(args[0], fs)
- for i, attr := range attrs {
- attrs[i] = strings.ToLower(attr)
- }
- shared.Main(args[1:], func(doc *html.Node) {
- var body *html.Node
- for n := doc.FirstChild.FirstChild; n != nil; n = n.NextSibling {
- if strings.ToLower(n.Data) == "body" {
- body = n
- break
- }
- }
- if body == nil {
- fmt.Fprintln(os.Stderr, "document does not contain a body")
- os.Exit(1)
- }
- for n := body.FirstChild; n != nil; n = n.NextSibling {
- if n.Type != html.ElementNode {
- continue
- }
- list := make([]string, len(attrs))
- var any bool
- for i, attrn := range attrs {
- for _, attr := range n.Attr {
- if strings.ToLower(attr.Key) == attrn {
- any = true
- list[i] = attr.Val
- }
- }
- }
- line := strings.Join(list, fs)
- if any {
- fmt.Println(line)
- }
- }
- })
-}
diff --git a/htmlindentheadings/main.go b/htmlindentheadings/main.go
@@ -1,51 +0,0 @@
-package main // import "entf.net/htmltools/htmlindentheadings"
-
-import (
- "fmt"
- "os"
- "strconv"
-
- "golang.org/x/net/html"
-
- "entf.net/htmltools/shared"
-)
-
-const usage = "usage: htmlindentheadings INDENT_LEVELS [FILES...]"
-
-func main() {
- args := os.Args[1:]
- if len(args) == 0 {
- fmt.Println(usage)
- os.Exit(1)
- }
- lvls, err := strconv.Atoi(args[0])
- if err != nil {
- fmt.Println(usage)
- os.Exit(1)
- }
- shared.Main(args[1:], func(doc *html.Node) {
- visit(lvls, doc)
- html.Render(os.Stdout, doc)
- })
-}
-
-func indent(lvls int, tag string) string {
- l := int(tag[1]) - 48
- l += lvls
- if l > 6 {
- l = 6
- }
- return fmt.Sprintf("h%d", l)
-}
-
-func visit(lvls int, n *html.Node) {
- if n.Type == html.ElementNode {
- switch n.Data {
- case "h1", "h2", "h3", "h4", "h5", "h6":
- n.Data = indent(lvls, n.Data)
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- visit(lvls, c)
- }
-}
diff --git a/htmlremove/main.go b/htmlremove/main.go
@@ -1,34 +0,0 @@
-package main // import "entf.net/htmltools/htmlremove"
-
-import (
- "fmt"
- "os"
-
- "github.com/andybalholm/cascadia"
- "golang.org/x/net/html"
-
- "entf.net/htmltools/shared"
-)
-
-func main() {
- args := os.Args[1:]
- if len(args) == 0 {
- fmt.Println("usage: htmlremove SELECTOR [FILES...]")
- os.Exit(1)
- }
- sel, err := cascadia.Compile(args[0])
- if err != nil {
- fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
- os.Exit(1)
- }
- shared.Main(args[1:], func(doc *html.Node) {
- remove(sel, doc)
- })
-}
-
-func remove(sel cascadia.Selector, doc *html.Node) {
- for _, n := range sel.MatchAll(doc) {
- n.Parent.RemoveChild(n)
- }
- html.Render(os.Stdout, doc)
-}
diff --git a/htmlselect/main.go b/htmlselect/main.go
@@ -1,40 +0,0 @@
-package main // import "entf.net/htmltools/htmlselect"
-
-import (
- "bytes"
- "fmt"
- "os"
- "strings"
-
- "github.com/andybalholm/cascadia"
- "golang.org/x/net/html"
-
- "entf.net/htmltools/shared"
-)
-
-func main() {
- args := os.Args[1:]
- if len(args) == 0 {
- fmt.Println("usage: htmlselect SELECTOR [FILES...]")
- os.Exit(1)
- }
- sel, err := cascadia.Compile(args[0])
- if err != nil {
- fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
- os.Exit(1)
- }
- shared.Main(args[1:], func(doc *html.Node) {
- dosel(sel, doc)
- })
-}
-
-func dosel(sel cascadia.Selector, doc *html.Node) {
- for _, n := range sel.MatchAll(doc) {
- buf := &bytes.Buffer{}
- html.Render(buf, n)
- l := buf.String()
- l = strings.ReplaceAll(l, "\n", " ")
- l = strings.TrimSpace(l)
- fmt.Println(l)
- }
-}
diff --git a/htmltotext/main.go b/htmltotext/main.go
@@ -1,26 +0,0 @@
-package main // import "entf.net/htmltools/htmltotext"
-
-import (
- "fmt"
- "os"
- "strings"
-
- "golang.org/x/net/html"
-
- "entf.net/htmltools/shared"
-)
-
-func main() {
- shared.Main(os.Args[1:], visit)
-}
-
-func visit(n *html.Node) {
- if n.Type == html.TextNode {
- if t := strings.TrimSpace(n.Data); t != "" {
- fmt.Println(t)
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- visit(c)
- }
-}
diff --git a/htmlunwrap/main.go b/htmlunwrap/main.go
@@ -1,42 +0,0 @@
-package main // import "entf.net/htmltools/htmlunwrap"
-
-import (
- "fmt"
- "os"
-
- "github.com/andybalholm/cascadia"
- "golang.org/x/net/html"
-
- "entf.net/htmltools/shared"
-)
-
-func main() {
- args := os.Args[1:]
- if len(args) == 0 {
- fmt.Println("usage: htmlremove SELECTOR [FILES...]")
- os.Exit(1)
- }
- sel, err := cascadia.Compile(args[0])
- if err != nil {
- fmt.Fprintf(os.Stderr, "selector invalid: %v\n", err)
- os.Exit(1)
- }
- shared.Main(args[1:], func(doc *html.Node) {
- unwrap(sel, doc)
- })
-}
-
-func unwrap(sel cascadia.Selector, doc *html.Node) {
- for _, n := range sel.MatchAll(doc) {
- cs := make([]*html.Node, 0)
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- cs = append(cs, c)
- }
- for _, c := range cs {
- n.RemoveChild(c)
- n.Parent.InsertBefore(c, n)
- }
- n.Parent.RemoveChild(n)
- }
- html.Render(os.Stdout, doc)
-}
diff --git a/shared.go b/shared.go
@@ -0,0 +1,48 @@
+package htmltools
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "golang.org/x/net/html"
+)
+
+var currentFile string
+
+func readerFromFile(file string) (f io.Reader, err error) {
+ if file == "-" {
+ currentFile = "[stdin]"
+ f = os.Stdin
+ } else {
+ currentFile = file
+ f, err = os.Open(file)
+ if err != nil {
+ return
+ }
+ }
+ return
+}
+
+func LogErr(err error) {
+ fmt.Fprintf(os.Stderr, "%s: %v\n", currentFile, err)
+}
+
+func Main(args []string, handleFunc func(*html.Node)) {
+ if len(args) == 0 {
+ args = append(args, "-")
+ }
+ for _, a := range args {
+ f, err := readerFromFile(a)
+ if err != nil {
+ LogErr(err)
+ continue
+ }
+ doc, err := html.Parse(f)
+ if err != nil {
+ LogErr(err)
+ return
+ }
+ handleFunc(doc)
+ }
+}
diff --git a/shared/shared.go b/shared/shared.go
@@ -1,48 +0,0 @@
-package shared
-
-import (
- "fmt"
- "io"
- "os"
-
- "golang.org/x/net/html"
-)
-
-var currentFile string
-
-func readerFromFile(file string) (f io.Reader, err error) {
- if file == "-" {
- currentFile = "[stdin]"
- f = os.Stdin
- } else {
- currentFile = file
- f, err = os.Open(file)
- if err != nil {
- return
- }
- }
- return
-}
-
-func LogErr(err error) {
- fmt.Fprintf(os.Stderr, "%s: %v\n", currentFile, err)
-}
-
-func Main(args []string, handleFunc func(*html.Node)) {
- if len(args) == 0 {
- args = append(args, "-")
- }
- for _, a := range args {
- f, err := readerFromFile(a)
- if err != nil {
- LogErr(err)
- continue
- }
- doc, err := html.Parse(f)
- if err != nil {
- LogErr(err)
- return
- }
- handleFunc(doc)
- }
-}