|
|
1.1 ! root 1: echo "merge on datakit and domain names" 1>&2 ! 2: cat $* | awk ' ! 3: # ! 4: # unique is an array that associates a numeric id with each ! 5: # symbolic one ! 6: # ! 7: BEGIN { ! 8: nextid=1; ! 9: } ! 10: # ! 11: # add com/att to the front of a dk string to make a unique id ! 12: # ! 13: $1=="uucp" { ! 14: id = 0 ! 15: for (i=2; i<=NF; i++) ! 16: if(match($i, "/")){ ! 17: str = "com/att/" $i ! 18: if(str in unique){ ! 19: id = unique[str]; ! 20: } else { ! 21: print nextid " UNIQ," str ! 22: id = nextid; ! 23: nextid = nextid+1; ! 24: } ! 25: break; ! 26: } ! 27: if(id==0){ ! 28: id = nextid; ! 29: nextid = nextid+1; ! 30: } ! 31: line = "" ! 32: for (i=2; i<=NF; i++) ! 33: if(match($i, "/")){ ! 34: str = "com/att/" $i ! 35: unique[str] = id; ! 36: line = line " DK," $i ! 37: } else ! 38: line = line " " $i ! 39: print id " " line ! 40: next; ! 41: } ! 42: # ! 43: # invert order of domain components to make a unique id ! 44: # ! 45: $1=="hosts" { ! 46: id = 0 ! 47: for (i=2; i<=NF; i++) ! 48: if(match($i, ".att.com")){ ! 49: n = split($i,a,"."); ! 50: str = a[n]; ! 51: for(j=n-1; j>=1; j--) ! 52: str = str "/" a[j]; ! 53: if(str in unique){ ! 54: id = unique[str]; ! 55: } else { ! 56: print nextid " UNIQ," str ! 57: id = nextid; ! 58: nextid = nextid+1; ! 59: } ! 60: break; ! 61: } ! 62: if(id==0){ ! 63: id = nextid; ! 64: nextid = nextid+1; ! 65: } ! 66: line = "" ! 67: for (i=2; i<=NF; i++) ! 68: if(match($i, ".att.com")){ ! 69: n = split($i,a,"."); ! 70: str = a[n]; ! 71: for(j=n-1; j>=1; j--) ! 72: str = str "/" a[j]; ! 73: unique[str] = id; ! 74: line = line " DOMAIN," $i ! 75: } else ! 76: line = line " " $i ! 77: print id " " line ! 78: next; ! 79: } ! 80: ' | merge.sh | rmdups.sh
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.