| 1 | /* |
|---|
| 2 | * Requires a C99ish compiler. gcc 3 works. gcc 2.95 works. Earlier |
|---|
| 3 | * versions might. |
|---|
| 4 | * |
|---|
| 5 | * The arrays below use designated initializers to make it very explicit |
|---|
| 6 | * which elements are being set to what. The standard says that any elements |
|---|
| 7 | * without an initalizer in these starts out like it would if static - in |
|---|
| 8 | * other words, zero'ed out. That's usually what we wanted. |
|---|
| 9 | * |
|---|
| 10 | * However, since most people compiling Penn probably aren't going to be |
|---|
| 11 | * using a C99 compiler for some time to come, this program will translate |
|---|
| 12 | * from the DI form to the fully-initialized form that all C and C++ compilers |
|---|
| 13 | * understand. |
|---|
| 14 | * |
|---|
| 15 | * Example Usage: |
|---|
| 16 | * % cd pennmush |
|---|
| 17 | * % gcc -o gentables utils/gentables.c |
|---|
| 18 | * % ./gentables > src/tables.c |
|---|
| 19 | * % make |
|---|
| 20 | */ |
|---|
| 21 | |
|---|
| 22 | #include <stdio.h> |
|---|
| 23 | #include <limits.h> |
|---|
| 24 | #include <stdlib.h> |
|---|
| 25 | |
|---|
| 26 | /* Offsets (+1) for q-register lookup. */ |
|---|
| 27 | char q_offsets[UCHAR_MAX + 1] = { |
|---|
| 28 | ['0'] = 1, ['1'] = 2, ['2'] = 3, ['3'] = 4, ['4'] = 5, |
|---|
| 29 | ['5'] = 6, ['6'] = 7, ['7'] = 8, ['8'] = 9, ['9'] = 10, |
|---|
| 30 | ['A'] = 11, ['a'] = 11, |
|---|
| 31 | ['B'] = 12, ['b'] = 12, |
|---|
| 32 | ['C'] = 13, ['c'] = 13, |
|---|
| 33 | ['D'] = 14, ['d'] = 14, |
|---|
| 34 | ['E'] = 15, ['e'] = 15, |
|---|
| 35 | ['F'] = 16, ['f'] = 16, |
|---|
| 36 | ['G'] = 17, ['g'] = 17, |
|---|
| 37 | ['H'] = 18, ['h'] = 18, |
|---|
| 38 | ['I'] = 19, ['i'] = 19, |
|---|
| 39 | ['J'] = 20, ['j'] = 20, |
|---|
| 40 | ['K'] = 21, ['k'] = 21, |
|---|
| 41 | ['L'] = 22, ['l'] = 22, |
|---|
| 42 | ['M'] = 23, ['m'] = 23, |
|---|
| 43 | ['N'] = 24, ['n'] = 24, |
|---|
| 44 | ['O'] = 25, ['o'] = 25, |
|---|
| 45 | ['P'] = 26, ['p'] = 26, |
|---|
| 46 | ['Q'] = 27, ['q'] = 27, |
|---|
| 47 | ['R'] = 28, ['r'] = 28, |
|---|
| 48 | ['S'] = 29, ['s'] = 29, |
|---|
| 49 | ['T'] = 30, ['t'] = 30, |
|---|
| 50 | ['U'] = 31, ['u'] = 31, |
|---|
| 51 | ['V'] = 32, ['v'] = 32, |
|---|
| 52 | ['W'] = 33, ['w'] = 33, |
|---|
| 53 | ['X'] = 34, ['x'] = 34, |
|---|
| 54 | ['Y'] = 35, ['y'] = 35, |
|---|
| 55 | ['Z'] = 36, ['z'] = 36 |
|---|
| 56 | }; |
|---|
| 57 | |
|---|
| 58 | /* What characters the parser looks for. */ |
|---|
| 59 | char parse_interesting[UCHAR_MAX + 1] = { |
|---|
| 60 | ['\0'] = 1, |
|---|
| 61 | ['%'] = 1, |
|---|
| 62 | ['{'] = 1, |
|---|
| 63 | ['['] = 1, |
|---|
| 64 | ['('] = 1, |
|---|
| 65 | ['\\'] = 1, |
|---|
| 66 | [' '] = 1, |
|---|
| 67 | ['}'] = 1, |
|---|
| 68 | [']'] = 1, |
|---|
| 69 | [')'] = 1, |
|---|
| 70 | [','] = 1, |
|---|
| 71 | [';'] = 1, |
|---|
| 72 | ['='] = 1, |
|---|
| 73 | ['$'] = 1, |
|---|
| 74 | [0x1B] = 1 |
|---|
| 75 | }; |
|---|
| 76 | |
|---|
| 77 | /* What characters are allowed in attribute names. */ |
|---|
| 78 | char attribute_names[UCHAR_MAX + 1] = { |
|---|
| 79 | ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, |
|---|
| 80 | ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, |
|---|
| 81 | ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, |
|---|
| 82 | ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, |
|---|
| 83 | ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, |
|---|
| 84 | ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, |
|---|
| 85 | ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, |
|---|
| 86 | ['Z'] = 1, ['_'] = 1, ['#'] = 1, ['@'] = 1, ['$'] = 1, |
|---|
| 87 | ['!'] = 1, ['~'] = 1, ['|'] = 1, [';'] = 1, ['`'] = 1, |
|---|
| 88 | ['"'] = 1, ['\''] = 1,['&'] = 1, ['*'] = 1, ['-'] = 1, |
|---|
| 89 | ['+'] = 1, ['='] = 1, ['?'] = 1, ['/'] = 1, ['.'] = 1, |
|---|
| 90 | ['>'] = 1, ['<'] = 1, [','] = 1 |
|---|
| 91 | }; |
|---|
| 92 | |
|---|
| 93 | /* C89 format codes for strftime() */ |
|---|
| 94 | char valid_timefmt_codes[UCHAR_MAX + 1] = { |
|---|
| 95 | ['a'] = 1, ['A'] = 1, ['b'] = 1, ['B'] = 1, ['c'] = 1, |
|---|
| 96 | ['d'] = 1, ['H'] = 1, ['I'] = 1, ['j'] = 1, ['m'] = 1, |
|---|
| 97 | ['M'] = 1, ['p'] = 1, ['S'] = 1, ['U'] = 1, ['w'] = 1, |
|---|
| 98 | ['W'] = 1, ['x'] = 1, ['X'] = 1, ['y'] = 1, ['Y'] = 1, |
|---|
| 99 | ['Z'] = 1, ['$'] = 1 |
|---|
| 100 | }; |
|---|
| 101 | |
|---|
| 102 | /* Special characters for escape() and secure() */ |
|---|
| 103 | char escaped_chars[UCHAR_MAX + 1] = { |
|---|
| 104 | ['('] = 1, [')'] = 1, ['['] = 1, [']'] = 1, ['{'] = 1, |
|---|
| 105 | ['}'] = 1, ['$'] = 1, ['^'] = 1, ['%'] = 1, [','] = 1, |
|---|
| 106 | [';'] = 1, ['\\'] = 1 |
|---|
| 107 | }; |
|---|
| 108 | |
|---|
| 109 | |
|---|
| 110 | /* Color codes used in ansi markup */ |
|---|
| 111 | char ansi_codes[UCHAR_MAX + 1] = { |
|---|
| 112 | ['h'] = 1, ['i'] = 1, ['f'] = 1, ['u'] = 1, ['n'] = 1, |
|---|
| 113 | ['x'] = 1, ['r'] = 1, ['g'] = 1, ['y'] = 1, ['b'] = 1, |
|---|
| 114 | ['c'] = 1, ['m'] = 1, ['w'] = 1, |
|---|
| 115 | ['X'] = 1, ['R'] = 1, ['G'] = 1, ['Y'] = 1, ['B'] = 1, |
|---|
| 116 | ['C'] = 1, ['M'] = 1, ['W'] = 1, |
|---|
| 117 | ['/'] = 1, ['a'] = 1 |
|---|
| 118 | }; |
|---|
| 119 | |
|---|
| 120 | /** Accented characters |
|---|
| 121 | * |
|---|
| 122 | * The table is for ISO 8859-1 character set. |
|---|
| 123 | * It should be easy to modify it for other ISO 8859-X sets, or completely |
|---|
| 124 | * different families. |
|---|
| 125 | */ |
|---|
| 126 | typedef struct { |
|---|
| 127 | const char *base; /**< Base character */ |
|---|
| 128 | const char *entity; /**< HTML entity */ |
|---|
| 129 | } accent_info; |
|---|
| 130 | accent_info entity_table[UCHAR_MAX + 1] = { |
|---|
| 131 | // Assorted characters |
|---|
| 132 | ['<'] = {"<", "<"}, |
|---|
| 133 | ['>'] = {">", ">"}, |
|---|
| 134 | ['&'] = {"&", "&"}, |
|---|
| 135 | ['"'] = {"\\\"", """}, |
|---|
| 136 | ['\n'] = {"\\n", "<br>\\n"}, |
|---|
| 137 | // << and >> quotes |
|---|
| 138 | [171] = {"<<", "«"}, |
|---|
| 139 | [187] = {">>", "»"}, |
|---|
| 140 | // Upside-down punctuation |
|---|
| 141 | [161] = {"!", "¡"}, |
|---|
| 142 | [191] = {"?", "¿"}, |
|---|
| 143 | // szlig |
|---|
| 144 | [223] = {"s", "ß"}, |
|---|
| 145 | // thorn |
|---|
| 146 | [222] = {"P", "Þ"}, |
|---|
| 147 | [254] = {"p", "þ:"}, |
|---|
| 148 | // eth |
|---|
| 149 | [208] = {"D", "Ð"}, |
|---|
| 150 | [240] = {"o", "ð"}, |
|---|
| 151 | // Special symbols |
|---|
| 152 | [169] = {"(c)", "©"}, |
|---|
| 153 | [174] = {"(r)", "®"}, |
|---|
| 154 | [188] = {"1/4", "¼"}, |
|---|
| 155 | [189] = {"1/2", "½"}, |
|---|
| 156 | [190] = {"3/4", "¾"}, |
|---|
| 157 | |
|---|
| 158 | // AE ligatures |
|---|
| 159 | [198] = {"AE", "Æ"}, |
|---|
| 160 | [230] = {"ae", "æ"}, |
|---|
| 161 | |
|---|
| 162 | // Accented a's |
|---|
| 163 | [192] = {"A", "À"}, |
|---|
| 164 | [193] = {"A", "Á"}, |
|---|
| 165 | [194] = {"A", "Â"}, |
|---|
| 166 | [195] = {"A", "Ã"}, |
|---|
| 167 | [196] = {"A", "Ä"}, |
|---|
| 168 | [197] = {"A", "Å"}, |
|---|
| 169 | [224] = {"a", "à"}, |
|---|
| 170 | [225] = {"a", "á"}, |
|---|
| 171 | [226] = {"a", "â"}, |
|---|
| 172 | [227] = {"a", "ã"}, |
|---|
| 173 | [228] = {"a", "ä"}, |
|---|
| 174 | [229] = {"a", "å"}, |
|---|
| 175 | |
|---|
| 176 | // Accented c's |
|---|
| 177 | [199] = {"C", "Ç"}, |
|---|
| 178 | [231] = {"c", "ç"}, |
|---|
| 179 | |
|---|
| 180 | // Accented e's |
|---|
| 181 | [200] = {"E", "È"}, |
|---|
| 182 | [201] = {"E", "É"}, |
|---|
| 183 | [202] = {"E", "Ê"}, |
|---|
| 184 | [203] = {"E", "Ë"}, |
|---|
| 185 | [232] = {"e", "è"}, |
|---|
| 186 | [233] = {"e", "é"}, |
|---|
| 187 | [234] = {"e", "ê"}, |
|---|
| 188 | [235] = {"e", "ë"}, |
|---|
| 189 | |
|---|
| 190 | // Accented i's |
|---|
| 191 | [204] = {"I", "Ì"}, |
|---|
| 192 | [205] = {"I", "Í"}, |
|---|
| 193 | [206] = {"I", "Î"}, |
|---|
| 194 | [207] = {"I", "Ï"}, |
|---|
| 195 | [236] = {"i", "ì"}, |
|---|
| 196 | [237] = {"i", "í"}, |
|---|
| 197 | [238] = {"i", "î"}, |
|---|
| 198 | [239] = {"i", "ï"}, |
|---|
| 199 | |
|---|
| 200 | // Accented n's |
|---|
| 201 | [209] = {"N", "Ñ"}, |
|---|
| 202 | [241] = {"n", "ñ"}, |
|---|
| 203 | |
|---|
| 204 | // Accented o's |
|---|
| 205 | [210] = {"O", "Ò"}, |
|---|
| 206 | [211] = {"O", "Ó"}, |
|---|
| 207 | [212] = {"O", "Ô"}, |
|---|
| 208 | [213] = {"O", "Õ"}, |
|---|
| 209 | [214] = {"O", "Ö"}, |
|---|
| 210 | [242] = {"o", "ò"}, |
|---|
| 211 | [243] = {"o", "ó"}, |
|---|
| 212 | [244] = {"o", "ô"}, |
|---|
| 213 | [245] = {"o", "õ"}, |
|---|
| 214 | [246] = {"o", "ö"}, |
|---|
| 215 | |
|---|
| 216 | // Accented u's |
|---|
| 217 | [217] = {"U", "Ù"}, |
|---|
| 218 | [218] = {"U", "Ú"}, |
|---|
| 219 | [219] = {"U", "Û"}, |
|---|
| 220 | [220] = {"U", "Ü"}, |
|---|
| 221 | [249] = {"u", "ù"}, |
|---|
| 222 | [250] = {"u", "ú"}, |
|---|
| 223 | [251] = {"u", "û"}, |
|---|
| 224 | [252] = {"u", "ü"}, |
|---|
| 225 | |
|---|
| 226 | // Accented y's |
|---|
| 227 | [221] = {"Y", "Ý"}, |
|---|
| 228 | [253] = {"y", "ý"}, |
|---|
| 229 | [255] = {"y", "ÿ"}, |
|---|
| 230 | }; |
|---|
| 231 | |
|---|
| 232 | /* For tables of char's treated as small numeric values. */ |
|---|
| 233 | void print_table_bool(const char *type, const char *name, |
|---|
| 234 | char table[], int delta) { |
|---|
| 235 | int n ; |
|---|
| 236 | printf("%s %s[%d] = {\n", type, name, UCHAR_MAX + 1); |
|---|
| 237 | for (n = 1; n < UCHAR_MAX + 2; n++) { |
|---|
| 238 | printf("%3d", table[n - 1] + delta); |
|---|
| 239 | if (n < UCHAR_MAX + 1) |
|---|
| 240 | putchar(','); |
|---|
| 241 | if (n % 16 == 0) |
|---|
| 242 | putchar('\n'); |
|---|
| 243 | } |
|---|
| 244 | fputs("};\n\n", stdout); |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | void print_entity_table(const char *name, |
|---|
| 248 | const accent_info table[]) { |
|---|
| 249 | int n; |
|---|
| 250 | puts("typedef struct {"); |
|---|
| 251 | puts("const char *base;"); |
|---|
| 252 | puts("const char *entity;"); |
|---|
| 253 | puts("} accent_info;"); |
|---|
| 254 | printf("accent_info %s[%d] = {\n", name, UCHAR_MAX + 1); |
|---|
| 255 | for (n = 0; n < UCHAR_MAX + 1; n++) { |
|---|
| 256 | if (table[n].entity) |
|---|
| 257 | printf("{\"%s\", \"%s\"}", table[n].base, table[n].entity); |
|---|
| 258 | else |
|---|
| 259 | fputs("{NULL, NULL}", stdout); |
|---|
| 260 | if (n < UCHAR_MAX) |
|---|
| 261 | putchar(','); |
|---|
| 262 | putchar('\n'); |
|---|
| 263 | } |
|---|
| 264 | fputs("};\n\n", stdout); |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | |
|---|
| 268 | int main(int argc, char *argv[]) { |
|---|
| 269 | printf("/* This file was generated by running %s compiled from\n" |
|---|
| 270 | " * %s. Edit that file, not this one, when making changes. */\n" |
|---|
| 271 | "#include <stdlib.h>\n\n", |
|---|
| 272 | argv[0], __FILE__); |
|---|
| 273 | print_table_bool("signed char", "qreg_indexes", q_offsets, -1); |
|---|
| 274 | print_table_bool("char", "active_table", parse_interesting, 0); |
|---|
| 275 | print_table_bool("char", "atr_name_table", attribute_names, 0); |
|---|
| 276 | print_table_bool("char", "valid_timefmt_codes", valid_timefmt_codes, 0); |
|---|
| 277 | print_table_bool("char", "escaped_chars", escaped_chars, 0); |
|---|
| 278 | print_table_bool("char", "valid_ansi_codes", ansi_codes, 0); |
|---|
| 279 | print_entity_table("accent_table", entity_table); |
|---|
| 280 | return EXIT_SUCCESS; |
|---|
| 281 | } |
|---|