PennMUSH Community

root/1.8.3/trunk/src/boolexp.c

Revision 1032, 53.2 kB (checked in by shawnw, 1 year ago)

Merged 1.8.3p4 into trunk

Line 
1 /**
2  * \file boolexp.c
3  *
4  * \brief Boolean expression parser.
5  *
6  * This code implements a parser for boolean expressions of the form
7  * used in locks. Summary of parsing rules, lowest to highest precedence:
8  * \verbatim
9  * E -> T; E -> T | E                   (or)
10  * T -> F; T -> F & T                   (and)
11  * F -> !F;F -> A                       (not)
12  * A -> @L; A -> I                      (indirect)
13  * I -> =Identifier ; I -> C            (equality)
14  * C -> +Identifier ; C -> O            (carry)
15  * O -> $Identifier ; O -> L            (owner)
16  * L -> (E); L -> eval/attr/flag lock   (parens, special atoms)
17  * L -> E, L is an object name or dbref or #t* or #f*   (simple atoms)
18  * \endverbatim
19  *
20  * Previously, the boolexp code just used a parse tree of the
21  * boolexp. Now, it turns the parse tree into bytecode that can be
22  * stored in the chunk manager. It probably also evaluates faster, but
23  * no profiling has been done to support this claim. It certainly
24  * involves less non-tail recursion and better cache behavior.
25  *
26  * It's a three-stage process. First, the lock string is turned into a
27  * parse tree. Second, the tree is walked and "assembler" instructions
28  * are generated, including labels for jumps. Third, the "assembly" is
29  * stepped through and bytecode emitted, with labeled jumps replaced
30  * by distances that are offsets from the start of the
31  * bytecode. Pretty standard stuff.
32  *
33  * Each bytecode instruction is 5 bytes long (1 byte opcode + 4 byte
34  * int argument), and the minimum number of instructions in a compiled
35  * boolexp is 2, for a minimum size of 10 bytes. Compare this to the
36  * size of one parse-tree node, 16 bytes. Savings appear to be
37  * substantial, especially with complex locks with lots of ors or
38  * ands.
39  *
40  * Many lock keys have string arguments. The strings are standard
41  * 0-terminated C strings stored in a section of the same string as
42  * the bytecode instructions, starting right after the last
43  * instruction. They're accessed by offset from the start of the
44  * bytecode string. If the same string appears multiple times in the
45  * lock, only one copy is actually present in the string section.
46  *
47  * The VM for the bytecode is a simple register-based one.  The
48  * registers are R, the result register, set by test instructions and
49  * a few others, and S, the string register, which holds the extra
50  * string in the few tests that need two (A:B, A/B). There are
51  * instructions for each lock key type. There's a few extra ones to
52  * make decompiling back into a string dead easy. Nothing very
53  * complex.
54  *
55  * Future directions?
56  * \verbatim
57  * [Development] Raevnos is tempted in passing to re-write the boolexp parser in
58  * lex and yacc.
59  * [Development] Brazil laughs.
60  * [Development] Brazil says, "That might not be a bad idea."
61  * [Development] Raevnos has redone everything else in boolexp.c in Penn, so why
62  * not? :)
63  * [Development] Raevnos says, "Using the justification that it's a lot easier to
64  * expand the langage by adding new key types that way."
65  * \endverbatim
66  *
67  * So now you know who to blame if that particular item appears in a
68  * changelog for Penn or MUX.
69  *
70  * On a more serious note, a) #1234 is equivalent to b)
71  * =#1234|+#1234. Detecting b and turning it into a, or vis versa,
72  * would be easy to do. a is a common key, but turning it into b gets
73  * rid of a test instruction in the VM, at the cost of more
74  * instructions in generated lock bytecode. CISC or RISC? :) It's also
75  * easy to turn !!foo into foo, but nobody makes locks like that. Same
76  * with !#true and !#false. Of possibly more interest is rearranging
77  * the logic when ands, ors and nots are being used together. For
78  * example, !a|!b can become !(a&b).
79  *
80  * The only optimization done right now is thread jumping: If a jump
81  * would move the program counter to another jump operation, it instead
82  * goes to that jump's destination.
83  *
84  * There's more useful room for improvement in the lock
85  * @warnings. Checking things like flag and power keys for valid flags
86  * comes to mind.
87  *
88  */
89
90 #include "copyrite.h"
91 #include "config.h"
92
93 #include <ctype.h>
94 #include <string.h>
95 #ifdef HAVE_STDINT_H
96 #include <stdint.h>
97 #endif
98 #include "conf.h"
99 #include "dbdefs.h"
100 #include "mushdb.h"
101 #include "match.h"
102 #include "externs.h"
103 #include "lock.h"
104 #include "parse.h"
105 #include "attrib.h"
106 #include "flags.h"
107 #include "log.h"
108 #include "extchat.h"
109 #include "strtree.h"
110 #include "mymalloc.h"
111 #include "confmagic.h"
112
113 #ifdef WIN32
114 #pragma warning( disable : 4761)        /* disable warning re conversion */
115 #endif
116
117 /* #define DEBUG_BYTECODE */
118
119 /** Parse tree node types */
120 typedef enum boolexp_type {
121   BOOLEXP_AND, /**< A&B */
122   BOOLEXP_OR, /**< A|B */
123   BOOLEXP_NOT, /**< !A */
124   BOOLEXP_CONST, /**< A */
125   BOOLEXP_ATR, /**< A:B */
126   BOOLEXP_IND, /**< @A/B */
127   BOOLEXP_CARRY, /**< +A */
128   BOOLEXP_IS, /**< =A */
129   BOOLEXP_OWNER, /**< $A */
130   BOOLEXP_EVAL, /**< A/B */
131   BOOLEXP_FLAG, /**< A^B */
132   BOOLEXP_BOOL /**< #true, #false */
133 } boolexp_type;
134
135 /** An attribute lock specification for the parse tree.
136  * This structure is a piece of a boolexp that's used to store
137  * attribute locks (CANDO:1), eval locks (CANDO/1), and flag locks
138  * FLAG^WIZARD.
139  */
140 struct boolatr {
141   const char *name;             /**< Name of attribute, flag, etc. to test */
142   char text[BUFFER_LEN];        /**< Value to test against */
143 };
144
145 /** A boolean expression parse tree node.
146  * Boolean expressions are most widely used in locks. This structure
147  * is a general representation of the possible boolean expressions
148  * that can be specified in MUSHcode. It's used internally by the lock
149  * compiler.
150  */
151 struct boolexp_node {
152   /** Type of expression.
153    * The type of expressio is one of the boolexp_type's, such as
154    * and, or, not, constant, attribute, indirect, carry, is,
155    * owner, eval, flag, etc.
156    */
157   boolexp_type type;
158   dbref thing;                  /**< An object, or a boolean val */
159   /** The expression itself.
160    * This union comprises the various possible types of data we
161    * might need to represent any of the expression types.
162    */
163   union {
164     /** And and or locks: combinations of boolexps.
165      * This union member is used with and and or locks.
166      */
167     struct {
168       struct boolexp_node *a;   /**< One boolean expression */
169       struct boolexp_node *b;   /**< Another boolean expression */
170     } sub;
171     struct boolexp_node *n;             /**< Not locks: boolean expression to negate */
172     struct boolatr *atr_lock;   /**< Atr, eval and flag locks */
173     const char *ind_lock;       /**< Indirect locks */
174   } data;
175 };
176
177
178 /** The opcodes supported by the boolexp virtual machine. */
179 typedef enum bvm_opcode {
180   OP_JMPT, /**< Jump to ARG if R is true */
181   OP_JMPF, /**< Jump to ARG if R is false */
182   OP_TCONST, /**< Tests plain #ARG */
183   OP_TATR, /**< Tests S:ARG */
184   OP_TIND, /**< Tests @#ARG/S */
185   OP_TCARRY, /**< Tests +#ARG */
186   OP_TIS, /**< Tests =#ARG */
187   OP_TOWNER, /**< Tests $#ARG */
188   OP_TEVAL, /**< Tests S/ARG */
189   OP_TFLAG, /**< Tests FLAG^ARG */
190   OP_TTYPE, /**< Tests TYPE^ARG */
191   OP_TPOWER, /**< Tests POWER^ARG */
192   OP_TCHANNEL, /**< Tests CHANNEL^ARG */
193   OP_TIP, /**< Tests IP^ARG */
194   OP_THOSTNAME, /**< Tests HOSTNAME^ARG */
195   OP_TOBJID, /**< Tests OBJID^ARG */
196   OP_TDBREFLIST,        /**< Tests DBERFLIST^ARG */
197   OP_LOADS, /**< Load ARG into S */
198   OP_LOADR, /**< Load ARG into R */
199   OP_NEGR,  /**< Negate R */
200   OP_PAREN, /**< ARG = 0 for a (, ARG = 1 for a ) in decompiling */
201   OP_LABEL, /**< A label. Not actually in compiled bytecode */
202   OP_RET    /**< Stop evaluating bytecode */
203 } bvm_opcode;
204
205 /** The size of a single bytecode instruction. Probably 5 bytes
206  * everywhere. */
207 #define INSN_LEN (1 + sizeof(int))
208
209 /** Information describing one VM instruction or label in the
210  * intermediate "assembly" generated from a parse tree. The nodes are
211  * part of a linked list.  */
212 struct bvm_asmnode {
213   bvm_opcode op; /**< The opcode */
214   int arg; /**< The arg value, or a label or string number */
215   struct bvm_asmnode *next; /**< Pointer to the next node */
216 };
217
218 /** Information describing a string to emit in the string section of
219  * the bytecode. The nodes are part of a linked list.  */
220 struct bvm_strnode {
221   char *s; /**< The string */
222   size_t len; /**< Its length */
223   struct bvm_strnode *next; /**< Pointer to the next node */
224 };
225
226 /** A struct describing the complete assembly information needed to
227  * generate bytecode */
228 struct bvm_asm {
229   struct bvm_asmnode *head; /**< The start of the list of assembly instructions */
230   struct bvm_asmnode *tail; /**< The end of the list */
231   struct bvm_strnode *shead; /**< The start of the list of strings */
232   struct bvm_strnode *stail; /**< The end of the list */
233   int label; /**< The current label id to use */
234   size_t strcount;      /**< The number of nodes in the string list */
235 };
236
237 /** The flag lock key (A^B) only allows a few values for A. This
238  * struct and the the following table define the allowable ones. When
239  * adding a new type here, a matching new bytecode instruction should
240  * be added. */
241 struct flag_lock_types {
242   const char *name; /**< The value of A */
243   bvm_opcode op;  /**< The associated opcode */
244 };
245
246 /** What's allowed on the left-hand-side of LHS^RHS lock keys */
247 static struct flag_lock_types flag_locks[] = {
248   {"FLAG", OP_TFLAG},
249   {"POWER", OP_TPOWER},
250   {"TYPE", OP_TTYPE},
251   {"CHANNEL", OP_TCHANNEL},
252   {"OBJID", OP_TOBJID},
253   {"IP", OP_TIP},
254   {"HOSTNAME", OP_THOSTNAME},
255   {"DBREFLIST", OP_TDBREFLIST},
256   {NULL, OP_RET}
257 };
258
259 static uint8_t *
260 safe_get_bytecode(boolexp b)
261   __attribute_malloc__;
262     static uint8_t *get_bytecode(boolexp b, uint16_t * storelen);
263     static struct boolexp_node *alloc_bool(void) __attribute_malloc__;
264     static struct boolatr *alloc_atr(const char *name,
265                                      const char *s) __attribute_malloc__;
266     static void skip_whitespace(void);
267     static void free_bool(struct boolexp_node *b);
268     static struct boolexp_node *test_atr(char *s, char c);
269     static struct boolexp_node *parse_boolexp_R(void);
270     static struct boolexp_node *parse_boolexp_L(void);
271     static struct boolexp_node *parse_boolexp_O(void);
272     static struct boolexp_node *parse_boolexp_C(void);
273     static struct boolexp_node *parse_boolexp_I(void);
274     static struct boolexp_node *parse_boolexp_A(void);
275     static struct boolexp_node *parse_boolexp_F(void);
276     static struct boolexp_node *parse_boolexp_T(void);
277     static struct boolexp_node *parse_boolexp_E(void);
278     static int check_attrib_lock(dbref player, dbref target,
279                                  const char *atrname, const char *str);
280     static void free_boolexp_node(struct boolexp_node *b);
281     static int gen_label_id(struct bvm_asm *a);
282     static void append_insn(struct bvm_asm *a, bvm_opcode op, int arg,
283                             const char *s);
284     static struct bvm_asm *generate_bvm_asm(struct boolexp_node *b)
285  __attribute_malloc__;
286     static void generate_bvm_asm1(struct bvm_asm *a, struct boolexp_node *b,
287                                   boolexp_type outer);
288     static size_t pos_of_label(struct bvm_asm *a, int label);
289     static size_t offset_to_string(struct bvm_asm *a, int c);
290     static struct bvm_asmnode *insn_after_label(struct bvm_asm *a, int label);
291     static void opt_thread_jumps(struct bvm_asm *a);
292     static void optimize_bvm_asm(struct bvm_asm *a);
293     static boolexp emit_bytecode(struct bvm_asm *a, int derefs);
294     static void free_bvm_asm(struct bvm_asm *a);
295 #ifdef DEBUG_BYTECODE
296     static int sizeof_boolexp_node(struct boolexp_node *b);
297     static void print_bytecode(boolexp b);
298 #endif
299     extern void complain
300       (dbref player, dbref i, const char *name, const char *desc, ...)
301   __attribute__ ((__format__(__printf__, 4, 5)));
302     void check_lock(dbref player, dbref i, const char *name, boolexp be);
303     int warning_lock_type(const boolexp l);
304
305
306 /** String tree of attribute names. Used in the parse tree. Might go
307  * away as the trees aren't persistant any more. */
308     extern StrTree atr_names;
309 /** String tree of lock names. Used in the parse tree. Might go away
310  * as the trees aren't persistant any more. */
311     extern StrTree lock_names;
312 /** Are we currently loading the db? If so, we avoid certain checks that
313  * would create a circularity.
314  */
315     extern int loading_db;
316
317 /** Given a chunk id, return the bytecode for a boolexp. 
318  * \param b the boolexp to retrieve.
319  * \return a malloced copy of the bytecode.
320  */
321     static uint8_t *safe_get_bytecode(boolexp b)
322 {
323   uint8_t *bytecode;
324   uint16_t len;
325
326   len = chunk_len(b);
327   bytecode = mush_malloc(len, "boolexp.bytecode");
328   chunk_fetch(b, bytecode, len);
329   return bytecode;
330 }
331
332 /** Given a chunk id, return the bytecode for a boolexp. 
333  * \param b The boolexp to retrieve.
334  * \param storelen the length of the bytecode.
335  * \return a static copy of the bytecode.
336  */
337 static uint8_t *
338 get_bytecode(boolexp b, uint16_t * storelen)
339 {
340   static uint8_t bytecode[BUFFER_LEN * 2];
341   uint16_t len;
342
343   len = chunk_fetch(b, bytecode, sizeof bytecode);
344   if (storelen)
345     *storelen = len;
346   return bytecode;
347 }
348
349 /* Public functions */
350
351 /** Copy a boolexp.
352  * This function makes a copy of a boolexp, allocating new memory for
353  * the copy.
354  * \param b a boolexp to copy.
355  * \return an allocated copy of the boolexp.
356  */
357 boolexp
358 dup_bool(boolexp b)
359 {
360   boolexp r;
361   uint8_t *bytecode;
362   uint16_t len = 0;
363
364   if (b == TRUE_BOOLEXP)
365     return TRUE_BOOLEXP;
366
367   bytecode = get_bytecode(b, &len);
368
369   r = chunk_create(bytecode, len, 1);
370
371   return r;
372 }
373
374 /** Free a boolexp
375  * This function deallocates a boolexp
376  * \param b a boolexp to delete
377  */
378 void
379 free_boolexp(boolexp b)
380 {
381   if (b != TRUE_BOOLEXP)
382     chunk_delete(b);
383 }
384
385 /** Determine the memory usage of a boolexp.
386  * This function computes the total memory usage of a boolexp.
387  * \param b boolexp to analyze.
388  * \return size of boolexp in bytes.
389  */
390 int
391 sizeof_boolexp(boolexp b)
392 {
393
394   if (b == TRUE_BOOLEXP)
395     return 0;
396   else
397     return (int) chunk_len(b);
398 }
399
400 /** Evaluate a boolexp.
401  * This is the main function to be called by other hardcode. It
402  * determines whether a player can pass a boolexp lock on a given
403  * object.
404  * \param player the player trying to pass the lock.
405  * \param b the boolexp to evaluate.
406  * \param target the object with the lock.
407  * \retval 0 player fails to pass lock.
408  * \retval 1 player successfully passes lock.
409  */
410 int
411 eval_boolexp(dbref player /* The player trying to pass */ ,
412              boolexp b /* The boolexp */ ,
413              dbref target /* The object with the lock */ )
414 {
415   static int boolexp_recursion = 0;
416
417   if (!GoodObject(player))
418     return 0;
419
420   if (boolexp_recursion > MAX_DEPTH) {
421     notify(player, T("Too much recursion in lock!"));
422     return 0;
423   }
424   if (b == TRUE_BOOLEXP) {
425     return 1;
426   } else {
427     bvm_opcode op;
428     int arg;
429     ATTR *a;
430     int r = 0;
431     char *s = NULL;
432     uint8_t *bytecode, *pc;
433
434     bytecode = pc = safe_get_bytecode(b);
435
436     while (1) {
437       op = (bvm_opcode) *pc;
438       memcpy(&arg, pc + 1, sizeof arg);
439       pc += INSN_LEN;
440       switch (op) {
441       case OP_RET:
442         goto done;
443       case OP_JMPT:
444         if (r)
445           pc = bytecode + arg;
446         break;
447       case OP_JMPF:
448         if (!r)
449           pc = bytecode + arg;
450         break;
451       case OP_LABEL:
452       case OP_PAREN:
453         break;
454       case OP_LOADS:
455         s = (char *) (bytecode + arg);
456         break;
457       case OP_LOADR:
458         r = arg;
459         break;
460       case OP_NEGR:
461         r = !r;
462         break;
463       case OP_TCONST:
464         r = (GoodObject(arg)
465              && !IsGarbage(arg)
466              && (arg == player || member(arg, Contents(player))));
467         break;
468       case OP_TIS:
469         r = (GoodObject(arg)
470              && !IsGarbage(arg)
471              && arg == player);
472         break;
473       case OP_TCARRY:
474         r = (GoodObject(arg)
475              && !IsGarbage(arg)
476              && member(arg, Contents(player)));
477         break;
478       case OP_TOWNER:
479         r = (GoodObject(arg)
480              && !IsGarbage(arg)
481              && Owner(arg) == Owner(player));
482         break;
483       case OP_TIND:
484         /* We only allow evaluation of indirect locks if target can run
485          * the lock on the referenced object.
486          */
487         boolexp_recursion++;
488         if (!GoodObject(arg) || IsGarbage(arg))
489           r = 0;
490         else if (!Can_Read_Lock(target, arg, s))
491           r = 0;
492         else
493           r = eval_boolexp(player, getlock(arg, s), arg);
494         boolexp_recursion--;
495         break;
496       case OP_TATR:
497         boolexp_recursion++;
498         a = atr_get(player, s);
499         if (!a || !Can_Read_Attr(target, player, a))
500           r = 0;
501         else {
502           char tbuf[BUFFER_LEN];
503           strcpy(tbuf, atr_value(a));
504           r = local_wild_match((char *) bytecode + arg, tbuf);
505         }
506         boolexp_recursion--;
507         break;
508       case OP_TEVAL:
509         boolexp_recursion++;
510         r = check_attrib_lock(player, target, s, (char *) bytecode + arg);
511         boolexp_recursion--;
512         break;
513       case OP_TFLAG:
514         /* Note that both fields of a boolattr struct are upper-cased */
515         if (sees_flag("FLAG", target, player, (char *) bytecode + arg))
516           r = 1;
517         else
518           r = 0;
519         break;
520       case OP_TPOWER:
521         if (sees_flag("POWER", target, player, (char *) bytecode + arg))
522           r = 1;
523         else
524           r = 0;
525         break;
526       case OP_TOBJID:
527         {
528           dbref d;
529           d = parse_objid((char *) bytecode + arg);
530           r = (player == d);
531           break;
532         }
533       case OP_TCHANNEL:
534         {
535           CHAN *chan;
536           boolexp_recursion++;
537           find_channel((char *) bytecode + arg, &chan, target);
538           r = chan && onchannel(player, chan);
539           boolexp_recursion--;
540         }
541         break;
542       case OP_TIP:
543         boolexp_recursion++;
544         if (!Connected(Owner(player)))
545           r = 0;
546         else {
547           /* We use the attribute for permission checks, but we
548            * do the actual boolexp itself with the least idle
549            * descriptor's ip address.
550            */
551           a = atr_get(Owner(player), "LASTIP");
552           if (!a || !Can_Read_Attr(target, player, a))
553             r = 0;
554           else {
555             char *p = least_idle_ip(Owner(player));
556             r = p ? quick_wild((char *) bytecode + arg, p) : 0;
557           }
558         }
559         boolexp_recursion--;
560         break;
561       case OP_THOSTNAME:
562         boolexp_recursion++;
563         if (!Connected(Owner(player)))
564           r = 0;
565         else {
566           /* See comment for OP_TIP */
567           a = atr_get(Owner(player), "LASTSITE");
568           if (!a || !Can_Read_Attr(target, player, a))
569             r = 0;
570           else {
571             char *p = least_idle_hostname(Owner(player));
572             r = p ? quick_wild((char *) bytecode + arg, p) : 0;
573           }
574         }
575         boolexp_recursion--;
576         break;
577       case OP_TTYPE:
578         switch (bytecode[arg]) {
579         case 'R':
580           r = Typeof(player) == TYPE_ROOM;
581           break;
582         case 'E':
583           r = Typeof(player) == TYPE_EXIT;
584           break;
585         case 'T':
586           r = Typeof(player) == TYPE_THING;
587           break;
588         case 'P':
589           r = Typeof(player) == TYPE_PLAYER;
590           break;
591         }
592         break;
593       case OP_TDBREFLIST: