Plan 9 from Bell Labs’s /usr/web/sources/contrib/sl/rc/hgrab

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#!/bin/rc
hget $1 | uhtml | tr -d '
' | sed '
s!>[^<]*<!!g
s!^XXX!!g
s!(href|src|link)\=''([^'']+)''!\nXXX \2\n!g
s!(href|src|link)\="([^"]+)"!\nXXX \2\n!g
s!(href|src|link)\=([^ 	>]+)!\nXXX \2\n!g' |
awk '
BEGIN {
	b="'$1'"
}
/^XXX/ {
	u=substr($0, 5)
	if(match(u, /^(mailto|javascript|file|ftp|gopher):/))
		next
	if(index(b, u) == 1)
		next
	if(index(u, b) == 1)
		u=substr(u, 1+length(b))
	if(match(u, /[\?:;]/)){
		printf "# hget -b ''%s'' ''%s''\n", b, u
		next
	}
	f=u
	sub(/#.*$/, "", f)
	sub(/\/$/, "/index.html", f)
	sub(/^\//, "", f)
	if(index(f, "/")){
		d=f
		sub(/\/([^\/]+)$/, "", d);
		printf "mkdir -p ''%s''; ", d
	}
	printf "hget -b ''%s'' ''%s'' >''%s''\n", b, u, f
}' | sort | uniq

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].