PennMUSH Community

root/1.8.3/trunk/src/chunk.c

Revision 919, 79.7 kB (checked in by shawnw, 1 year ago)

1.8.3p3

Line 
1 /**
2  * \file chunk.c
3  *
4  * \brief Chunk memory management system
5  *
6  * <h3>Synopsis:</h3>
7  * The chunk memory management system has three goals: to reduce overall
8  * memory consumption, to improve locality of reference, and to allow
9  * less-used sections of memory to be paged out to disk.  These three
10  * goals are accomplished by implementing an allocation management layer
11  * on top of malloc(), with significantly reduced overhead and the ability
12  * to rearrange allocations to actively control fragmentation and increase
13  * locality.
14  *
15  *
16  * <h3>Basic operation:</h3>
17  * The managed memory pool is divided into regions of approximately 64KB.
18  * These regions contain variable-size chunks representing allocated and
19  * available (free) memory.  No individual allocation may be larger than
20  * will fit in a single region, and no allocation may be smaller than one
21  * byte.  Each chunk has between two and four bytes of overhead (indicating
22  * the used/free status, the size of the chunk, and the number of
23  * dereferences for the chunk), and each region has additional overhead
24  * of about 42 bytes.
25  *
26  * Allocations are made with the chunk_create() call, which is given
27  * the size of the data, the data value to be stored, and an initial
28  * dereference count to be assigned to the chunk.  Once created, the
29  * value of a chunk cannot be changed; the storage is immutable.
30  * chunk_create() returns an integral reference value that can be
31  * used to retrieve or free the allocation.
32  *
33  * Allocations are accessed with the chunk_fetch(), chunk_len(), and
34  * chunk_derefs() calls.  Each of these if given a reference (as
35  * returned by chunk_create()), and chunk_fetch() is additionally
36  * given a buffer and length to fill with the allocated value.  Both
37  * chunk_fetch() and chunk_len() increment a chunk's dereference
38  * count (up to the maximum of 255), which is used in migration to
39  * improve locality.
40  *
41  * Allocations are freed with the chunk_delete() call, which also
42  * requires a reference as input.
43  *
44  * Finally, allocations are allowed to rearrange themselves with the
45  * chunk_migration() call.  chunk_migration() takes an array of
46  * pointers to chunk references as input, and examines each of the
47  * indicated chunks to see which need to be moved to improve the
48  * distribution of allocations.  If any allocations are moved, then
49  * the references to the moved allocations are updated in place
50  * (hence the array of pointers to references, instead of just an
51  * array of references).  Migration may be done incrementally by
52  * submitting only a portion of the allocations with each call to
53  * chunk_migration(); however, _all_ allocations made with chunk_create()
54  * must eventually be submitted for migration in order to maintain the
55  * memory pool in a non-fragmented state.
56  *
57  *
58  * <h3>Migration:</h3>
59  * Under normal conditions, extended use of this chunk allocation system
60  * would lead to a significantly fragmented datastore, unless there was
61  * some means to defragment the storage arena.  In the long run, this could
62  * be very bad, leading to quite a mess.  Calling chunk_migration() gives
63  * the allocator permission to move allocations around both to defragment
64  * the arena and to improve locality of reference (by making sure that
65  * all the infrequently used chunks are segregated from the chunks in
66  * active use).  Of course, moving all the allocated chunks at once would
67  * be a slow and painful process.  Instead, migration may be done
68  * incrementally, giving permission to move a small number of chunks
69  * at any one time, and spreading out the cost of defragmenting the
70  * data store.
71  *
72  * Just because you give permission to move a chunk doesn't mean that it
73  * will be moved.  The chunk may be perfectly happy where it is, with
74  * no need to move it elsewhere.  Chunks are only moved when their
75  * personal happiness would be improved by a move.  In general, maximizing
76  * the happiness of individual chunks will improve the happiness of the
77  * whole.
78  *
79  * There are two things that factor into a chunk's happiness.
80  * The things that make a chunk unhappy are:
81  * <ul>
82  * <li> Having a dereference count different from the region average.
83  *      The greater the difference, the more unhappy the chunk is.
84  * <li> Being in a sparsely populated region.  The fewer chunks in a
85  *      region, the more unhappy the chunks in it.
86  * </ul>
87  * Neither of these factors are absolute; both of them have different
88  * weights that add into a general unhappiness for the chunk.  The lower
89  * the unhappiness, the better.
90  *
91  * Over time and usage, the dereference counts for chunks will increase
92  * and eventually reach a maximum value of 255.  (The count is limited
93  * by the fact that it's stored in a single byte for each chunk.)  If
94  * this is left unchecked, eventually all chunks would have a dereference
95  * count of 255, and the counts would be useless for improving locality.
96  * To counteract this, when the average dereference count for a certain
97  * number of regions exceeds 128, the 'migration period' is incremented
98  * and all chunk dereference counts are halved.  The critical number of
99  * regions is determined based on the cache size and the total number of
100  * regions.  If you're not using forking dumps, then period change should
101  * be controlled primarily by the frequency of database dumps (which end
102  * up incrementing the dereference count on all chunks, and thus all
103  * regions).  Given a dump frequency of once per hour (the default), there
104  * should be a period change about every 2.6 days.
105  *
106  *
107  * <h3>Statistics:</h3>
108  * The chunk memory management system keeps several statistics about
109  * the allocation pool, both to maintain good operation through active
110  * encouragement of locality, and to satisfy the curiosity of people
111  * using the system (and its designer ;-)).  These statistics are
112  * reported (in PennMUSH) through the use of the @stats command,
113  * with /chunks switch.
114  *
115  * @stats/chunks generates output similar to this:
116  * \verbatim
117  * Chunks:         99407 allocated (   8372875 bytes,     223808 ( 2%) overhead)
118  *                   74413 short     (   1530973 bytes,     148826 ( 9%) overhead)
119  *                   24994 medium    (   6841902 bytes,      74982 ( 1%) overhead)
120  *                       0 long      (         0 bytes,          0 ( 0%) overhead)
121  *                   128 free      (   1319349 bytes,      23058 ( 1%) fragmented)
122  * Regions:          147 total,       16 cached
123  * Paging:        158686 out,     158554 in
124  * Storage:      9628500 total (86% saturation)
125  *
126  * Period:             1 (   5791834 accesses so far,       1085 chunks at max)
127  * Migration:     245543 moves this period
128  *                  145536 slide
129  *                      45 away
130  *                   30719 fill exact
131  *                   69243 fill inexact
132  * \endverbatim
133  * First, the number of allocated chunks is given, along with their
134  * total size and overhead.  Then, the allocated chunks are broken
135  * up by size-range; short chunks (2 to 63 bytes) with two bytes of
136  * overhead each, medium chunks (64 to 8191 bytes) with three bytes
137  * of overhead each, and long chunks (8192 to ~64K bytes) with four
138  * bytes of overhead each.  Rounding out the individual chunk statistics
139  * is the number of free chunks, their total size, and the amount of
140  * fragmented free space (free space not in the largest free chunk for
141  * its region is considered fragmented).
142  *
143  * Next comes statistics on regions: the number of regions in use
144  * and the number held in the memory cache.  All regions not in the
145  * cache are paged out to disk.  Paging statistics follow, listing
146  * the number of times a region has been moved out of or into
147  * memory cache.  After that, the total amount of storage (in memory
148  * or on disk) used is given, along with the saturation rate (where
149  * saturation is indicated by what fraction of the used space is
150  * actually allocated in chunks).
151  *
152  * Finally comes statistics on migration and the migration period.
153  * The period number is listed, along with the total number of
154  * dereferences in the period and how many chunks have the maximum
155  * dereference count of 255.  Then the amount of migration movement
156  * is listed, both in total and broken up by category.  Slides occur
157  * when an allocation is shifted to the other side of a neighboring
158  * free space.  Away moves are made when an allocation is extremely
159  * unhappy where it is, and is pushed out to somewhere else.  Fills
160  * are when an allocation is moved in order to fill in a free space;
161  * the space can be either exactly filled by the move, or inexactly
162  * filled (leaving some remaining free space).
163  *
164  *
165  * <h3>Histograms:</h3>
166  * The chunk memory management system can also display a few
167  * histograms about itself.  These histograms are reported (in PennMUSH)
168  * through the use of the @stats command, with the /regions, /freespace,
169  * or /paging switches.
170  *
171  * All of @stats/regions, @stats/freespace, and @stas/paging produce
172  * histograms vs. region average dereference count.  The histograms
173  * use buckets four counts wide, so all regions from 0-3 will be in
174  * the first bucket, 4-7 in the second, etc., up to 252-255 in the
175  * last bucket.  If the heights of the buckets are significantly
176  * different, then the highest spikes will be allowed to extend off
177  * the top of the histogram (with their real values labeled in
178  * parenthesis next to them).
179  *
180  * @stats/regions is a histogram of how many regions at each count
181  * currently exist.  In a healthy game, there should be a large spike
182  * at some dereference count between 64 and 128 (representing the
183  * largely unused portion of the database), a lesser spike at 255
184  * (representing the portion of the database that's used very frequently),
185  * and a smattering of regions at other counts, with either new areas
186  * of the database (below the large spike) or semi-frequently used
187  * areas (above the large spike).  New migration periods occur when
188  * the large spike would pass 128, at which point everything is halved
189  * and the spike is pushed back down to 64.
190  *
191  * @stats/freespace is a histogram of how much free space exists in
192  * regions at each dereference count.  This histogram is included
193  * to aid in diagnosis of the cause for dropping saturation rates.
194  *
195  * @stats/paging is a histogram of the number of regions being paged
196  * in or out at each dereference count.  As of this writing, a very
197  * unhealthy behaviour is observed, wherein the histogram shows a
198  * trapeziod between 64 and 128, drowning out most of the rest of the
199  * chart.  This indicates that as time goes on, the attributes
200  * associated with a single low-use object are getting scattered
201  * randomly throughout all the low-use regions, and thus when dumps
202  * occur (with their linear enumeration of all attributes on objects)
203  * the low-use regions thrash in and out of cache.  This can be very
204  * detrimental to dump performance.  Something will have to be done
205  * to fix this tendency of migration.  Healthy behaviour will make
206  * some other pattern in the paging histogram which has not yet been
207  * determined.
208  */
209
210 #include "copyrite.h"
211 #include "config.h"
212 #include "conf.h"
213
214 #include <limits.h>
215 #include <string.h>
216 #include <stdarg.h>
217 #include <stdlib.h>
218
219 #include <fcntl.h>
220 #include <assert.h>
221 #include <sys/types.h>
222 #ifdef WIN32
223 #include <wtypes.h>
224 #include <io.h>
225 #else
226 #include <unistd.h>
227 #endif
228 #include <errno.h>
229
230 #include "externs.h"
231 #include "chunk.h"
232 #include "command.h"
233 #include "dbdefs.h"
234 #include "intrface.h"
235 #include "log.h"
236 #include "mymalloc.h"
237 #include "confmagic.h"
238
239 #ifdef WIN32
240 #pragma warning( disable : 4761)        /* disable warning re conversion */
241 #endif
242
243 /* A whole bunch of debugging #defines. */
244 /** Basic debugging stuff - are assertions checked? */
245 #undef CHUNK_DEBUG
246 /** Paranoid people check for region validity after every operation
247  * that modifies a region. */
248 #undef CHUNK_PARANOID
249 /** Log all moves and slides during migration. */
250 #undef DEBUG_CHUNK_MIGRATE
251 /** Log creation of regions. */
252 #undef DEBUG_CHUNK_REGION_CREATE
253 /** Log paging of regions. */
254 #undef DEBUG_CHUNK_PAGING
255 /** Log all mallocs. */
256 #undef DEBUG_CHUNK_MALLOC
257
258 /** For debugging, we keep a rolling log of debug messages.
259  * These get dumped to disk if we're about to panic.
260  */
261 #define ROLLING_LOG_SIZE 200
262 #define ROLLING_LOG_ENTRY_LEN 1024
263
264 /* debug... */
265 #ifdef CHUNK_DEBUG
266 #define ASSERT(x) assert(x)
267 #else                           /* CHUNK_DEBUG */
268 static int ignore;      /**< Used to shut up compiler warnings when not asserting */
269 #define ASSERT(x) ignore++
270 #endif                          /* CHUNK_DEBUG */
271
272 /*
273  * Sizes, limits, etc.
274  */
275 /** Region size, including header.
276  * This is a little less than 64K to allow for malloc overhead without
277  * spilling into next page */
278 #define REGION_SIZE 65500
279
280 /** Region capacity.
281  * This is the size minus the fixed region overhead.
282  */
283 #define REGION_CAPACITY (REGION_SIZE - FIRST_CHUNK_OFFSET_IN_REGION)
284
285 /** Maximum chunk length.
286  * This is fairly arbitrary, but must be less than
287  * REGION_CAPACITY (it must fit in a region).
288  */
289 #define MAX_CHUNK_LEN (16384-1)
290
291 /** Number of oddballs tracked in regions.
292  * This is used to figure out when we should pull regions in because
293  * we have an opportunity to migrate chunks that don't match.
294  * Relatively arbitrary; too low means you don't move things out
295  * enough, but boosting it too high wastes memory.
296  */
297 #define NUM_ODDBALLS 10
298
299 /** Minimum disagreement to be an oddball.
300  * This is used to figure out when we should pull regions in because
301  * we have an opportunity to migrate chunks that don't match.
302  * Relatively arbitrary; too low means you don't move things out
303  * enough, but boosting it too high wastes migration time.
304  */
305 #define ODDBALL_THRESHOLD 8
306
307 /*
308  * FIXME: pulling config variables out of my left ear. Fix later.
309  */
310 /** How much space is initially allocated for the in-memory region array? */
311 #define FIXME_INIT_REGION_LEN 20
312 /** How much does the region array grow by each time it has to grow? */
313 #define FIXME_REGION_ARRAY_INCREMENT 10
314
315 /** Limit for when being a nearly-empty region counts against being
316  * a good region.  This is exponential: an empty region gets a penalty
317  * of 1 << LONLINESS_LIMIT.  A near-empty region gets a penalty of
318  * 1 << (LONLINESS_LIMIT - used_count).
319  *
320  * Rationale: we don't want to reuse empty regions (or make new regions)
321  * for trivialities.
322  */
323 #define LONLINESS_LIMIT 5
324
325 /** Free space limit for when we consider making new regions.
326  * The total free space must be less than this percent of capacity.
327  *
328  * Rationale: we don't want to waste memory with lots of extra regions.
329  */
330 #define FREE_PERCENT_LIMIT 2
331
332 /** Bias for allocating chunks in a region that's already in memory.
333  * Actually, this is a bias against allocating in swapped-out regions,
334  * but that's a nit...
335  *
336  * Rationale: reduce the amount of paging during migration.
337  */
338 #define IN_MEMORY_BIAS 4
339
340 /*
341  *  Structures and Accessor Macros
342  */
343 /*
344  * What a chunk_reference_t looks like from the inside
345  */
346 /** Get the region from a chunk_reference_t. */
347 #define ChunkReferenceToRegion(ref) ((ref) >> 16)
348 /** Get the offset from a chunk_reference_t. */
349 #define ChunkReferenceToOffset(ref) ((ref) & 0xFFFF)
350 /** Make a chunk_reference_t from a region and offset. */
351 #define ChunkReference(region, offset) \
352   ((chunk_reference_t)(((region) << 16) | (offset)))
353
354 /** Sentinel value used to mark unused cache regions. */
355 #define INVALID_REGION_ID 0xffff
356
357 /**
358  * \verbatim
359  * The chunk headers look like this:
360  *
361  * Short:
362  * byte 0     byte 1
363  *  76543210   76543210
364  * +--------+ +--------+
365  * |ft len  | | deref  |
366  * +--------+ +--------+
367  *  ||\__6_/   \___8__/
368  *  ||   |         `----- deref count (decays)
369  *  ||   `--------------- length of data
370  *  |`------------------- tag bit (0 for short)
371  *  `-------------------- free flag (0 for allocated, 1 for free)
372  *
373  * Medium:
374  * byte 0     byte 1     byte 2
375  *  76543210   76543210   76543210
376  * +--------+ +--------+ +--------+
377  * |ftg lenM| | deref  | |  lenL  |
378  * +--------+ +--------+ +--------+
379  *  |||\_5_/   \___8__/   \___8__/
380  *  |||  |         |          `----- length, least significant
381  *  |||  |         `---------------- deref count (decays)
382  *  |||  `-------------------------- length, most significant
383  *  ||`----------------------------- tag bit (0 for medium)
384  *  |`------------------------------ tag bit (1 for medium/long)
385  *  `------------------------------- free flag
386  *
387  * Long:
388  * byte 0     byte 1     byte 2     byte 3
389  *  76543210   76543210   76543210   76543210
390  * +--------+ +--------+ +--------+ +--------+
391  * |ftg     | | deref  | |  lenM  | |  lenL  |
392  * +--------+ +--------+ +--------+ +--------+
393  *  |||        \___8__/   \_______16________/
394  *  |||            |               `----------- length of data
395  *  |||            `--------------------------- deref count (decays)
396  *  ||`---------------------------------------- tag bit (1 for long)
397  *  |`----------------------------------------- tag bit (1 for medium/long)
398  *  `------------------------------------------ free flag
399  *
400  *
401  * Note in particular that the dereference count is always in the second
402  * byte of a chunk, to simplify the access logic.
403  * \endverbatim
404  */
405
406 /*
407  * Fields in chunk headers
408  */
409 #define CHUNK_FREE_MASK   0x80
410 #define CHUNK_FREE        0x80
411 #define CHUNK_USED        0x00
412
413 #define CHUNK_TAG1_MASK   0x40
414 #define CHUNK_TAG1_SHORT  0x00
415 #define CHUNK_TAG1_MEDIUM 0x40
416 #define CHUNK_TAG1_LONG   0x40
417 #define CHUNK_TAG1_OFFSET 0
418
419 #define CHUNK_TAG2_MASK   0x20
420 #define CHUNK_TAG2_MEDIUM 0x00
421 #define CHUNK_TAG2_LONG   0x20
422 #define CHUNK_TAG2_OFFSET 0
423
424 #define CHUNK_SHORT_LEN_MASK 0x3F
425 #define CHUNK_SHORT_LEN_OFFSET 0
426
427 #define CHUNK_MEDIUM_LEN_MSB_MASK 0x1F
428 #define CHUNK_MEDIUM_LEN_LSB_MASK 0xFF
429 #define CHUNK_MEDIUM_LEN_MSB_OFFSET 0
430 #define CHUNK_MEDIUM_LEN_LSB_OFFSET 2
431
432 #define CHUNK_LONG_LEN_MSB_MASK 0xFF
433 #define CHUNK_LONG_LEN_LSB_MASK 0xFF
434 #define CHUNK_LONG_LEN_MSB_OFFSET 2
435 #define CHUNK_LONG_LEN_LSB_OFFSET 3
436
437 #define CHUNK_DEREF_OFFSET 1
438
439 #define CHUNK_DEREF_MAX 0xFF
440
441 #define CHUNK_AGED_MASK   0x10
442 #define CHUNK_AGED_OFFSET 0
443
444 #define CHUNK_SHORT_DATA_OFFSET 2
445 #define CHUNK_MEDIUM_DATA_OFFSET 3
446 #define CHUNK_LONG_DATA_OFFSET 4
447
448 #define MIN_CHUNK_LEN 1
449 #define MIN_REMNANT_LEN (CHUNK_SHORT_DATA_OFFSET + MIN_CHUNK_LEN)
450
451 #define MAX_SHORT_CHUNK_LEN CHUNK_SHORT_LEN_MASK
452 #define MAX_MEDIUM_CHUNK_LEN \
453         ((CHUNK_MEDIUM_LEN_MSB_MASK << 8) | CHUNK_MEDIUM_LEN_LSB_MASK)
454 #define MAX_LONG_CHUNK_LEN \
455         (REGION_CAPACITY - CHUNK_LONG_DATA_OFFSET)
456
457 #define LenToFullLen(len) \
458   ((len) + \
459    (((len) > MAX_SHORT_CHUNK_LEN) \
460     ? ((len) > MAX_MEDIUM_CHUNK_LEN) \
461       ? CHUNK_LONG_DATA_OFFSET \
462       : CHUNK_MEDIUM_DATA_OFFSET \
463     : CHUNK_SHORT_DATA_OFFSET))
464
465 #define ChunkPointer(region, offset) \
466   (((unsigned char *)(regions[(region)].in_memory)) + (offset))
467 #define ChunkReferenceToPointer(ref) \
468   ChunkPointer(ChunkReferenceToRegion((ref)), ChunkReferenceToOffset((ref)))
469
470 /*
471  * Macros for probing and manipulating chunk headers
472  */
473 #define CPLenShort(cptr) \
474   ((cptr)[CHUNK_SHORT_LEN_OFFSET] & CHUNK_SHORT_LEN_MASK)
475 #define CPLenMedium(cptr) \
476   ((((cptr)[CHUNK_MEDIUM_LEN_MSB_OFFSET] & CHUNK_MEDIUM_LEN_MSB_MASK) << 8) + \
477    ((cptr)[CHUNK_MEDIUM_LEN_LSB_OFFSET] & CHUNK_MEDIUM_LEN_LSB_MASK))
478 #define CPLenLong(cptr) \
479   ((((cptr)[CHUNK_LONG_LEN_MSB_OFFSET] & CHUNK_LONG_LEN_MSB_MASK) << 8) + \
480    ((cptr)[CHUNK_LONG_LEN_LSB_OFFSET] & CHUNK_LONG_LEN_LSB_MASK))
481 #define CPLen(cptr) \
482   ((*(cptr) & CHUNK_TAG1_MASK) \
483    ? (*(cptr) & CHUNK_TAG2_MASK) \
484      ? CPLenLong((cptr)) \
485      : CPLenMedium((cptr)) \
486    : CPLenShort((cptr)))
487 #define ChunkLen(region, offset) \
488   CPLen(ChunkPointer((region), (offset)))
489 #define CPFullLen(cptr) \
490   ((*(cptr) & CHUNK_TAG1_MASK) \
491    ? (*(cptr) & CHUNK_TAG2_MASK) \
492      ? (CPLenLong((cptr)) + CHUNK_LONG_DATA_OFFSET) \
493      : (CPLenMedium((cptr)) + CHUNK_MEDIUM_DATA_OFFSET) \
494    : (CPLenShort((cptr)) + CHUNK_SHORT_DATA_OFFSET))
495 #define ChunkFullLen(region, offset) \
496   CPFullLen(ChunkPointer((region), (offset)))
497
498 #define ChunkIsFree(region, offset) \
499   ((*ChunkPointer((region), (offset)) & CHUNK_FREE_MASK) == CHUNK_FREE)
500 #define ChunkIsShort(region, offset) \
501   ((*ChunkPointer((region), (offset)) & CHUNK_TAG1_MASK) == CHUNK_TAG1_SHORT)
502 #define ChunkIsMedium(region, offset) \
503   ((*ChunkPointer((region), (offset)) & (CHUNK_TAG1_MASK | CHUNK_TAG2_MASK)) \
504    == (CHUNK_TAG1_MEDIUM | CHUNK_TAG2_MEDIUM))
505 #define ChunkIsLong(region, offset) \
506   ((*ChunkPointer((region), (offset)) & (CHUNK_TAG1_MASK | CHUNK_TAG2_MASK)) \
507    == (CHUNK_TAG1_LONG | CHUNK_TAG2_LONG))
508
509 #define ChunkDerefs(region, offset) \
510   (ChunkPointer((region), (offset))[CHUNK_DEREF_OFFSET])
511
512 #define CPDataPtr(cptr) \
513   ((*(cptr) & CHUNK_TAG1_MASK) \
514    ? (*(cptr) & CHUNK_TAG2_MASK) \
515      ? (cptr) + CHUNK_LONG_DATA_OFFSET \
516      : (cptr) + CHUNK_MEDIUM_DATA_OFFSET \
517    : (cptr) + CHUNK_SHORT_DATA_OFFSET)
518 #define ChunkDataPtr(region, offset) \
519   (CPDataPtr(ChunkPointer(region, offset)))
520
521 #define ChunkNextFree(region, offset) \
522   ((ChunkDataPtr(region, offset)[0] << 8) + ChunkDerefs(region, offset))
523
524 /* 0 for no, 1 for yes with room, 2 for exact */
525 #define FitsInSpace(size, capacity) \
526   (((size) == (capacity)) ? 2 : ((size) <= (capacity) - MIN_REMNANT_LEN))
527
528 /** Region info that gets paged out with its region.
529  * This is at the start of the region;
530  * the rest of the 64K bytes of the region contain chunks.
531  */
532 typedef struct region_header {
533   uint16_t region_id;           /**< will be INVALID_REGION_ID if not in use */
534   uint16_t first_free;          /**< offset of 1st free chunk */
535   struct region_header *prev;   /**< linked list prev for LRU cache */
536   struct region_header *next;   /**< linked list next for LRU cache */
537 } RegionHeader;
538
539 #define FIRST_CHUNK_OFFSET_IN_REGION sizeof(RegionHeader)
540
541 /** In-memory (never paged) region info.  */
542 typedef struct region {
543   uint16_t used_count;          /**< number of used chunks */
544   uint16_t free_count;          /**< number of free chunks */
545   uint16_t free_bytes;          /**< number of free bytes (with headers) */
546   uint16_t largest_free_chunk;  /**< largest single free chunk */
547   uint32_t total_derefs;        /**< total of all used chunk derefs */
548   uint32_t period_last_touched; /**< "this" period, for deref counts;
549                                            we don't page in regions to update
550                                            counts on period change! */
551   RegionHeader *in_memory;      /**< cache entry; NULL if paged out */
552   uint16_t oddballs[NUM_ODDBALLS];      /**< chunk offsets with odd derefs */
553 } Region;
554
555 #define RegionDerefs(region) \
556   (regions[(region)].used_count \
557    ? (regions[(region)].total_derefs >> \
558       (curr_period - regions[(region)].period_last_touched)) / \
559      regions[(region)].used_count \
560    : 0)
561 #define RegionDerefsWithChunk(region, derefs) \
562    (((regions[(region)].total_derefs >> \
563      (curr_period - regions[(region)].period_last_touched)) + derefs) / \
564     (regions[(region)].used_count + 1))
565
566 /*
567  *  Globals
568  */
569
570 /** Swap File */
571 #ifdef WIN32
572 typedef HANDLE fd_type;
573 static HANDLE swap_fd;
574 static HANDLE swap_fd_child = INVALID_HANDLE_VALUE;
575 #else
576 typedef int fd_type;
577 static int swap_fd;
578 static int swap_fd_child = -1;
579 #endif
580 static char child_filename[300];
581
582 /** Deref scale control.
583  * When the deref counts get too big, the current period is incremented
584  * and all derefs are divided by 2. */
585 static uint32_t curr_period;
586
587 /*
588  * Info about all regions
589  */
590 static uint32_t region_count;   /**< regions in use */
591 static uint32_t region_array_len;       /**< length of regions array */
592 static Region *regions;         /**< regions array, realloced as (rarely) needed */
593
594 /*
595  * regions presently in memory
596  */
597 static uint32_t cached_region_count;    /**< number of regions in cache */
598 static RegionHeader *cache_head;        /**< most recently used region */
599 static RegionHeader *cache_tail;        /**< least recently used region */
600
601 /*
602  * statistics
603  */
604 static int stat_used_short_count;       /**< How many short chunks? */
605 static int stat_used_short_bytes;       /**< How much space in short chunks? */
606 static int stat_used_medium_count;      /**< How many medium chunks? */
607 static int stat_used_medium_bytes;      /**< How much space in medium chunks? */
608 static int stat_used_long_count;        /**< How many long chunks? */
609 static int stat_used_long_bytes;        /**< How much space in long chunks? */
610 static int stat_deref_count;            /**< Dereferences this period */
611 static int stat_deref_maxxed;           /**< Number of chunks with max derefs */
612 /** histogram for average derefs of regions being paged in/out */
613 static int stat_paging_histogram[CHUNK_DEREF_MAX + 1];
614 static int stat_page_out;               /**< Number of page-outs */
615 static int stat_page_in;                /**< Number of page-ins */
616 static int stat_migrate_slide;          /**< Number of slide migrations */
617 static int stat_migrate_move;           /**< Number of move migrations */
618 static int stat_migrate_away;           /**< Number of chunk evictions */
619 static int stat_create;                 /**< Number of chunk creations */
620 static int stat_delete;                 /**< Number of chunk deletions */
621
622
623
624 /*
625  * migration globals that are used for holding relevant data...
626  */
627 static int m_count;             /**< The used length for the arrays. */
628 static chunk_reference_t **m_references; /**< The passed-in references array. */
629
630 #ifdef CHUNK_PARANOID
631 /** Log of recent actions for debug purposes */
632 static char rolling_log[ROLLING_LOG_SIZE][ROLLING_LOG_ENTRY_LEN];
633 static int rolling_pos;
634 static int noisy_log = 0;
635 #endif
636
637 /*
638  * Forward decls
639  */
640 static void find_oddballs(uint16_t region);
641
642 /*
643  * Debug routines
644  */
645 /** Add a message to the rolling log. */
646 static void
647 debug_log(char const *format, ...)
648 {
649 #ifdef CHUNK_PARANOID
650   va_list args;
651
652   va_start(args, format);
653   vsprintf(rolling_log[rolling_pos], format, args);
654   va_end(args);
655
656   rolling_log[rolling_pos][ROLLING_LOG_ENTRY_LEN - 1] = '\0';
657   if (noisy_log) {
658     fprintf(tracelog_fp, "%s\n", rolling_log[rolling_pos]);
659     fflush(tracelog_fp);
660   }
661   rolling_pos = (rolling_pos + 1) % ROLLING_LOG_SIZE;
662 #else
663   if (format)
664     return;                     /* shut up the compiler warning */
665 #endif
666 }
667
668 #ifdef CHUNK_PARANOID
669 /** Dump the rolling log. */
670 static void
671 dump_debug_log(FILE * fp)
672 {
673   int j;
674   fputs("Recent chunk activity:\n", fp);
675   j = rolling_pos;
676   do {
677     if (rolling_log[j][0]) {
678       fputs(rolling_log[j], fp);
679       fputc('\n', fp);
680       rolling_log[j][0] = '\0';
681     }
682     j = (j + 1) % ROLLING_LOG_SIZE;
683   } while (j != rolling_pos);
684   fputs("End of recent chunk activity.\n", fp);
685   fflush(fp);
686 }
687
688 /** Test if a chunk is migratable. */
689 static int
690 migratable(uint16_t region, uint16_t offset)
691 {
692   chunk_reference_t ref = ChunkReference(region, offset);
693   int j;
694
695   for (j = 0; j < m_count; j++)
696     if (m_references[j][0] == ref)
697       return 1;
698   return 0;
699 }
700
701 /** Give a detailed map of a region.
702  * Lists pertinent region information, and all the chunks in the region.
703  * Does not print the contents of the chunks (which would probably be
704  * unreadable, anyway).
705  * \param region the region to display.
706  * \param fp the FILE* to output to.
707  */
708 static void
709 debug_dump_region(uint16_t region, FILE * fp)
710 {
711   Region *rp = regions + region;
712   RegionHeader *rhp;
713   uint16_t offset, count;
714
715   ASSERT(region < region_count);
716   rhp = rp->in_memory;
717
718   fprintf(fp, "region: id:%04x period:%-8x deref:%-8x (%-2x per chunk)\n",
719           region, rp->period_last_touched, rp->total_derefs,
720           RegionDerefs(region));
721   fprintf(fp, "        #used:%-4x #free:%-4x fbytes:%-4x hole:%-4x ",
722           rp->used_count, rp->free_count, rp->free_bytes,
723           rp->largest_free_chunk);
724   if (rhp)
725     fprintf(fp, "first:%-4x h_id:%-4x\n", rhp->first_free, rhp->region_id);
726   else
727     fprintf(fp, "PAGED\n");
728   fflush(fp);
729
730   if (rhp) {
731     for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
732          offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
733       fprintf(fp, "chunk:%c%4s %-6s off:%04x full:%04x ",
734               migratable(region, offset) ? '*' : ' ',
735               ChunkIsFree(region, offset) ? "FREE" : "",
736               ChunkIsShort(region, offset) ? "SHORT" :
737               (ChunkIsMedium(region, offset) ? "MEDIUM" : "LONG"),
738               offset, ChunkFullLen(region, offset));
739       if (ChunkIsFree(region, offset)) {
740         fprintf(fp, "next:%04x\n", ChunkNextFree(region, offset));
741       } else {
742         fprintf(fp, "doff:%04x len:%04x ",
743                 ChunkDataPtr(region, offset) - (unsigned char *) rhp,
744                 ChunkLen(region, offset));
745