# Determine number, i, of nonmatches, n[1..n[0]], and matches, m[1..m[0]], # in line, l, using criteria, k, 0 maximal, 1 trim end, 2 trim start, 3 minimal # nonmatches and matches are alternating and nonoverlapping # the first nonmatch or last match may be null function mmatch(n,m,l,k,regexp) { i = 1; l1 = l; n[0] = 1; m[0] = 0; while (match(l1,regexp)!=0) { # Recurse to handle minimal matches rs = RSTART; rl = RLENGTH; if (k%2) { # Trim end l0 = substr(l1,rs,rl-1); while (match(l0,regexp)!=0) { rs += RSTART - 1; rl = RLENGTH; l0 = substr(l1,rs,rl-1); #printf "%d %d %d: %s\n", j, RSTART, RLENGTH, l0 } } if (k%4 > 1) { # Trim start l0 = substr(l1,rs+1,rl-1); while (match(l0,regexp)!=0) { rs += RSTART; rl = RLENGTH; l0 = substr(l1,rs+1,rl-1); #printf "%d %d %d: %s\n", j, RSTART, RLENGTH, l0 } } n[i] = substr(l1,0,rs-1); m[i] = substr(l1,rs,rl); l1 = substr(l1,rs+rl); printf "%d %d: n)%s\tm)%s\n", FNR, i, n[i], m[i] i++; } if (length(l1)) { n[i] = l1; m[i] = NULL; printf "%d %d: n)%s\tm)%s \n", FNR, i, n[i], m[i] } m[0] = i; n[0] = i; return (0); } # Determine number, i, and indicies of # nonmatches, l[n[i-1]+m[i-1]..n[i]-1], and matches, l[n[i]..n[i]+m[i]-1], # in line, l, using criteria, k, 0 maximal, 1 trim end, 2 trim start, 3 minimal # nonmatches and matches are alternating and nonoverlapping # the first nonmatch or last match may be null # this allows partitioning of the line into string before, of, and after the nth match function imatch(n,m,l,k,regexp) { i = 1; l1 = l; n[0] = 1; m[0] = 0; while (match(l1,regexp)!=0) { # Recurse to handle minimal matches rs = RSTART; rl = RLENGTH; if (k%2) { # Trim end l0 = substr(l1,rs,rl-1); while (match(l0,regexp)!=0) { rs += RSTART - 1; rl = RLENGTH; l0 = substr(l1,rs,rl-1); #printf "%d %d %d: %s\n", j, RSTART, RLENGTH, l0 } } if (k%4 > 1) { # Trim start l0 = substr(l1,rs+1,rl-1); while (match(l0,regexp)!=0) { rs += RSTART; rl = RLENGTH; l0 = substr(l1,rs+1,rl-1); #printf "%d %d %d: %s\n", j, RSTART, RLENGTH, l0 } } n[i] = n[i-1] + m[i-1] + rs - 1; m[i] = rl; l1 = substr(l1,rs+rl); # indicies #printf "%d %d: n)%d\tm)%d\n", FNR, i, n[i], m[i] # nonmatches and matches #printf "%d %d: n)%s\tm)%s\n", FNR, i, substr(l,n[i-1]+m[i-1],n[i]-(n[i-1]+m[i-1])), substr(l,n[i],m[i]) # nth matches ##printf "%d %d: n)%s\tm)%s l)%s\n", FNR, i, substr(l,1,n[i]-1), substr(l,n[i],m[i]), substr(l,n[i]+m[i]) i++; } if (length(l1)) { n[i] = n[i-1] + m[i-1] + length(l1); m[i] = 0; # indicies #printf "%d %d: n)%d\tm)%d \n", FNR, i, n[i], m[i] # nonmatches and matches #printf "%d %d: n)%s\tm)%s\n", FNR, i, substr(l,n[i-1]+m[i-1],n[i]-(n[i-1]+m[i-1])), substr(l,n[i],m[i]) # nth matches ##printf "%d %d: n)%s\tm)%s l)%s\n", FNR, i, substr(l,1,n[i]-1), substr(l,n[i],m[i]), substr(l,n[i]+m[i]) } m[0] = i; n[0] = i; return (0); } # Process the file { i = 1; l = $0; k = 3; # Determine number, i, of nonmatches, n[1..n[0]], and matches, m[1..m[0]], # in line, l, using criteria, k 0 maximal, 1 trim end, 2 trim start, 3 minimal # Need string "m.*n" instead of pattern /m.*n/ for call #i = mmatch(n,m,l,k,"m.*n"); # Lower case i = imatch(n,m,l,k,""); i1 = 1; while (i1 < m[0] && m[i1] > 0) { # Print enclosed matches printf "%s %d %d: %s\n", FILENAME, FNR, i1, substr(l,n[i1]+9,m[i1]-11) i1 = i1 + 1; } # Upper case i = imatch(n,m,l,k,""); i1 = 1; while (i1 < m[0] && m[i1] > 0) { # Print enclosed matches printf "%s %d %d: %s\n", FILENAME, FNR, i1, substr(l,n[i1]+9,m[i1]-11) i1 = i1 + 1; } }