|
|
1.1 root 1: echo "merge on datakit and domain names" 1>&2
2: cat $* | awk '
3: #
4: # unique is an array that associates a numeric id with each
5: # symbolic one
6: #
7: BEGIN {
8: nextid=1;
9: }
10: #
11: # add com/att to the front of a dk string to make a unique id
12: #
13: $1=="uucp" {
14: id = 0
15: for (i=2; i<=NF; i++)
16: if(match($i, "/")){
17: str = "com/att/" $i
18: if(str in unique){
19: id = unique[str];
20: } else {
21: print nextid " UNIQ," str
22: id = nextid;
23: nextid = nextid+1;
24: }
25: break;
26: }
27: if(id==0){
28: id = nextid;
29: nextid = nextid+1;
30: }
31: line = ""
32: for (i=2; i<=NF; i++)
33: if(match($i, "/")){
34: str = "com/att/" $i
35: unique[str] = id;
36: line = line " DK," $i
37: } else
38: line = line " " $i
39: print id " " line
40: next;
41: }
42: #
43: # invert order of domain components to make a unique id
44: #
45: $1=="hosts" {
46: id = 0
47: for (i=2; i<=NF; i++)
48: if(match($i, ".att.com")){
49: n = split($i,a,".");
50: str = a[n];
51: for(j=n-1; j>=1; j--)
52: str = str "/" a[j];
53: if(str in unique){
54: id = unique[str];
55: } else {
56: print nextid " UNIQ," str
57: id = nextid;
58: nextid = nextid+1;
59: }
60: break;
61: }
62: if(id==0){
63: id = nextid;
64: nextid = nextid+1;
65: }
66: line = ""
67: for (i=2; i<=NF; i++)
68: if(match($i, ".att.com")){
69: n = split($i,a,".");
70: str = a[n];
71: for(j=n-1; j>=1; j--)
72: str = str "/" a[j];
73: unique[str] = id;
74: line = line " DOMAIN," $i
75: } else
76: line = line " " $i
77: print id " " line
78: next;
79: }
80: ' | merge.sh | rmdups.sh
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.