#!/usr/bin/awk -f # extract elements from XML # usage: awk -f tag.awk tag=tag.class file # BUG: May not handle weird nesting well, # because end tags do not have to have the class attribute. BEGIN { tag = "pre"; printing = 0; } # This should happen before the first line, # but after command line definitions. NR < 2 { class = ""; n = split( tag, a, "." ); if ( 1 < n ) { tag = a[1]; class = a[2]; } startTag = "<" tag ">"; if ( 0 < length( class ) ) { startTag = "<" tag "[^>]*class *= *\"" class "\"[^>]*>"; } endTag = "<\/" tag ">"; } # For a "one liner": $0 ~ startTag && $0 ~ endTag { print "one liner"; print startTag, endTag; print; n = split($0, a, startTag); print n, a[2]; n = split(a[2], b, endTag); print b[1]; next; } $0 ~ endTag { if ( 0 < printing ) { printing--; } } { if ( 0 < printing ) { print; } } $0 ~ startTag { printing++; }