lexh.c (2410B)
1 #include <signal.h> 2 #include <stdlib.h> 3 #include <stdio.h> 4 #include <string.h> 5 #include <limits.h> 6 7 #ifndef SIGHUP 8 #define SIGHUP 0 9 #endif 10 11 extern unsigned genhash(char *); 12 13 static sig_atomic_t signalled; 14 static unsigned long M, K, S; 15 16 static unsigned 17 hash(char *s) 18 { 19 unsigned h; 20 21 h = genhash(s); 22 23 return (unsigned short) h; 24 } 25 26 static int 27 foundmap(unsigned *th, int ntok) 28 { 29 int i; 30 unsigned h; 31 char bmap[USHRT_MAX]; 32 33 for (K = 1; (unsigned short) (K+1) != 0; K += 2) { 34 if (signalled) { 35 signalled = 0; 36 fprintf(stderr, "M=%lu,K=%lu,S=%lu\n", M, K, S); 37 } 38 memset(bmap, 0, sizeof(bmap)); 39 40 for (i = 0; i < ntok; i++) { 41 h = (th[i]*K >> S) & M; 42 if (bmap[h]) 43 break; 44 bmap[h] = 1; 45 } 46 47 if (i == ntok) 48 return 1; 49 } 50 51 return 0; 52 } 53 54 static void 55 phash(char *toks[], int ntok) 56 { 57 int i, j, nbits; 58 unsigned *th, h; 59 60 if ((th = calloc(ntok, sizeof(*th))) == NULL) { 61 perror("hash"); 62 exit(EXIT_FAILURE); 63 } 64 65 for (i = 0; i < ntok; i++) { 66 h = hash(toks[i]); 67 for (j = 0; j < i && th[j] != h; j++) 68 ; 69 70 if (i != j) { 71 fprintf(stderr, 72 "hash: collision %s-%s\n", 73 toks[i], 74 toks[j]); 75 exit(EXIT_FAILURE); 76 } 77 th[i] = h; 78 } 79 80 for (nbits = 1; 1<<nbits < ntok; ++nbits) 81 ; 82 83 for (i = nbits+1; i < 16; i++) { 84 M = ~((unsigned) -1 << i); 85 86 for (j = nbits; j < 32; j++) { 87 S = 32 - j; 88 89 if (foundmap(th, ntok)) 90 goto found; 91 } 92 } 93 94 fputs("hash: not possible perfect hash\n", stderr); 95 exit(EXIT_FAILURE); 96 97 found: 98 printf("unsigned long K=%lu, M=%lu, S=%lu;\n", K, M, S); 99 100 printf("short hashmap[%d] = {\n", 1<<i); 101 for (i = 0; i < ntok; i++) 102 printf("\t[%ld] = %d,\n", (th[i]*K >> S) & M, i+1); 103 puts("};"); 104 } 105 106 static void 107 sighandler(int num) 108 { 109 signalled = 1; 110 signal(SIGHUP, sighandler); 111 } 112 113 int 114 main() 115 { 116 int nr, n; 117 char line[BUFSIZ], **tokens, *tok; 118 119 nr = 0; 120 tokens = NULL; 121 for (;;) { 122 if (!fgets(line, sizeof(line), stdin)) 123 break; 124 n = strlen(line); 125 if (n == 0) 126 continue; 127 if (line[n-1] != '\n') { 128 fputs("error: line truncated\n", stderr); 129 exit(EXIT_FAILURE); 130 } 131 line[n-1] = '\0'; 132 133 tok = malloc(n); 134 tokens = realloc(tokens, (nr+1) * sizeof(*tokens)); 135 if (!tokens || !tok) { 136 perror("hash"); 137 exit(EXIT_FAILURE); 138 } 139 tokens[nr++] = memcpy(tok, line, n); 140 } 141 142 if (ferror(stdin)) { 143 perror("hash"); 144 exit(EXIT_FAILURE); 145 } 146 147 signal(SIGHUP, sighandler); 148 149 if (nr != 0) 150 phash(tokens, nr); 151 152 return 0; 153 }