# rosters.awk BEGIN { OFS = "|" } /Captain/ { next } /Defender/ { next } /headline/ { # country sub(/^.*id=\"/,"") sub(/\".*$/,"") c = $0 } /title/ { # player sub(/^.*title=\"/,"") sub(/\".*$/,"") sub(/\(.*\)/,"") p = $0 } /bday/ { # birthday sub(/^.*bday\">/,"") sub(/<\/sp.*$/,"") # print c, p, $0 } b = substr($0,length($0)-5) if (c == "Algeria" && p ~ /Lacen/) b = "1984-03-15" if (c == "Chile" && p ~ /Manuel Rojas/) b = "1983-06-23" if (c == "Chile" && p ~ /Beausejour/) b = "1984-06-01" if (c == "Croatia" && p ~ /Badelj/) b = "" if (c == "Germany" && p ~ /Kramer/) b = "1991-02-19" if ( birthdays[c,b] != "" ) { print birthdays[c,b] print c,p,$0 print "" countries[c]++ } birthdays[c,b] = c OFS p OFS $0 } END { for (i in countries) pairs++ print pairs }