/* */
This source file includes following definitions.
- mpool_open
- mpool_filter
- mpool_new
- mpool_get
- mpool_put
- mpool_close
- mpool_sync
- mpool_bkt
- mpool_write
- mpool_look
- mpool_stat
1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #if defined(LIBC_SCCS) && !defined(lint)
31 static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
32 #endif /* LIBC_SCCS and not lint */
33
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
37 #include <sys/stat.h>
38
39 #include <errno.h>
40 #include <stdio.h>
41 #ifdef STDC_HEADERS
42 #include <stdlib.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #else
47 #include <strings.h>
48 #endif
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52
53 #if (defined(_WIN32) && !defined(__CYGWIN__))
54 #define fsync _commit
55 #endif
56
57 #include "queue.h"
58 #include "db.h"
59
60 #define __MPOOLINTERFACE_PRIVATE
61 #include "mpool.h"
62
63 static BKT *mpool_bkt(MPOOL *);
64 static BKT *mpool_look(MPOOL *, pgno_t);
65 static int mpool_write(MPOOL *, BKT *);
66
67 /**
68 * mpool_open --
69 * Initialize a memory pool.
70 *
71 * @param key
72 * @param fd
73 * @param pagesize
74 * @param maxcache
75 */
76 MPOOL *
77 mpool_open(key, fd, pagesize, maxcache)
78 void *key;
79 int fd;
80 pgno_t pagesize, maxcache;
81 {
82 struct stat sb;
83 MPOOL *mp;
84 int entry;
85
86 /*
87 * Get information about the file.
88 *
89 * XXX
90 * We don't currently handle pipes, although we should.
91 */
92 if (fstat(fd, &sb))
93 return (NULL);
94 if (!S_ISREG(sb.st_mode)) {
95 errno = ESPIPE;
96 return (NULL);
97 }
98
99 /* Allocate and initialize the MPOOL cookie. */
100 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
101 return (NULL);
102 CIRCLEQ_INIT(&mp->lqh);
103 for (entry = 0; entry < HASHSIZE; ++entry)
104 CIRCLEQ_INIT(&mp->hqh[entry]);
105 mp->maxcache = maxcache;
106 mp->npages = sb.st_size / pagesize;
107 mp->pagesize = pagesize;
108 mp->fd = fd;
109 return (mp);
110 }
111
112 /**
113 * mpool_filter --
114 * Initialize input/output filters.
115 *
116 * @param mp
117 * @param pgin
118 * @param pgout
119 * @param pgcookie
120 */
121 void
122 mpool_filter(mp, pgin, pgout, pgcookie)
123 MPOOL *mp;
124 void (*pgin)(void *, pgno_t, void *);
125 void (*pgout)(void *, pgno_t, void *);
126 void *pgcookie;
127 {
128 mp->pgin = pgin;
129 mp->pgout = pgout;
130 mp->pgcookie = pgcookie;
131 }
132
133 /**
134 * mpool_new --
135 * Get a new page of memory.
136 *
137 * @param mp
138 * @param pgnoaddr
139 */
140 void *
141 mpool_new(mp, pgnoaddr)
142 MPOOL *mp;
143 pgno_t *pgnoaddr;
144 {
145 struct _hqh *head;
146 BKT *bp;
147
148 if (mp->npages == MAX_PAGE_NUMBER) {
149 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
150 abort();
151 }
152 #ifdef STATISTICS
153 ++mp->pagenew;
154 #endif
155 /*
156 * Get a BKT from the cache. Assign a new page number, attach
157 * it to the head of the hash chain, the tail of the lru chain,
158 * and return.
159 */
160 if ((bp = mpool_bkt(mp)) == NULL)
161 return (NULL);
162 *pgnoaddr = bp->pgno = mp->npages++;
163 bp->flags = MPOOL_PINNED;
164
165 head = &mp->hqh[HASHKEY(bp->pgno)];
166 CIRCLEQ_INSERT_HEAD(head, bp, hq);
167 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
168 return (bp->page);
169 }
170
171 /**
172 * mpool_get
173 * Get a page.
174 *
175 * @param mp
176 * @param pgno
177 * @param flags
178 */
179 void *
180 mpool_get(mp, pgno, flags)
181 MPOOL *mp;
182 pgno_t pgno;
183 u_int flags; /* XXX not used? */
184 {
185 struct _hqh *head;
186 BKT *bp;
187 off_t off;
188 int nr;
189
190 /* Check for attempt to retrieve a non-existent page. */
191 if (pgno >= mp->npages) {
192 errno = EINVAL;
193 return (NULL);
194 }
195
196 #ifdef STATISTICS
197 ++mp->pageget;
198 #endif
199
200 /* Check for a page that is cached. */
201 if ((bp = mpool_look(mp, pgno)) != NULL) {
202 #ifdef DEBUG
203 if (bp->flags & MPOOL_PINNED) {
204 (void)fprintf(stderr,
205 "mpool_get: page %d already pinned\n", bp->pgno);
206 abort();
207 }
208 #endif
209 /*
210 * Move the page to the head of the hash chain and the tail
211 * of the lru chain.
212 */
213 head = &mp->hqh[HASHKEY(bp->pgno)];
214 CIRCLEQ_REMOVE(head, bp, hq);
215 CIRCLEQ_INSERT_HEAD(head, bp, hq);
216 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
217 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
218
219 /* Return a pinned page. */
220 bp->flags |= MPOOL_PINNED;
221 return (bp->page);
222 }
223
224 /* Get a page from the cache. */
225 if ((bp = mpool_bkt(mp)) == NULL)
226 return (NULL);
227
228 /* Read in the contents. */
229 #ifdef STATISTICS
230 ++mp->pageread;
231 #endif
232 off = mp->pagesize * pgno;
233 #ifdef HAVE_PREAD
234 if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) {
235 if (nr >= 0)
236 errno = EFTYPE;
237 return (NULL);
238 }
239 #else
240 if (lseek(mp->fd, off, SEEK_SET) != off)
241 return (NULL);
242 if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
243 if (nr >= 0)
244 errno = EFTYPE;
245 return (NULL);
246 }
247 #endif
248
249 /* Set the page number, pin the page. */
250 bp->pgno = pgno;
251 bp->flags = MPOOL_PINNED;
252
253 /*
254 * Add the page to the head of the hash chain and the tail
255 * of the lru chain.
256 */
257 head = &mp->hqh[HASHKEY(bp->pgno)];
258 CIRCLEQ_INSERT_HEAD(head, bp, hq);
259 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
260
261 /* Run through the user's filter. */
262 if (mp->pgin != NULL)
263 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
264
265 return (bp->page);
266 }
267
268 /**
269 * mpool_put
270 * Return a page.
271 *
272 * @param mp
273 * @param page
274 * @param flags
275 */
276 int
277 mpool_put(mp, page, flags)
278 MPOOL *mp;
279 void *page;
280 u_int flags;
281 {
282 BKT *bp;
283
284 #ifdef STATISTICS
285 ++mp->pageput;
286 #endif
287 bp = (BKT *)((char *)page - sizeof(BKT));
288 #ifdef DEBUG
289 if (!(bp->flags & MPOOL_PINNED)) {
290 (void)fprintf(stderr,
291 "mpool_put: page %d not pinned\n", bp->pgno);
292 abort();
293 }
294 #endif
295 bp->flags &= ~MPOOL_PINNED;
296 bp->flags |= flags & MPOOL_DIRTY;
297 return (RET_SUCCESS);
298 }
299
300 /**
301 * mpool_close
302 * Close the buffer pool.
303 *
304 * @param mp
305 */
306 int
307 mpool_close(mp)
308 MPOOL *mp;
309 {
310 BKT *bp;
311
312 /* Free up any space allocated to the lru pages. */
313 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
314 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
315 free(bp);
316 }
317
318 /* Free the MPOOL cookie. */
319 free(mp);
320 return (RET_SUCCESS);
321 }
322
323 /**
324 * mpool_sync
325 * Sync the pool to disk.
326 *
327 * @param mp
328 */
329 int
330 mpool_sync(mp)
331 MPOOL *mp;
332 {
333 BKT *bp;
334
335 /* Walk the lru chain, flushing any dirty pages to disk. */
336 for (bp = mp->lqh.cqh_first;
337 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
338 if (bp->flags & MPOOL_DIRTY &&
339 mpool_write(mp, bp) == RET_ERROR)
340 return (RET_ERROR);
341
342 /* Sync the file descriptor. */
343 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
344 }
345
346 /**
347 * mpool_bkt
348 * Get a page from the cache (or create one).
349 *
350 * @param mp
351 */
352 static BKT *
353 mpool_bkt(mp)
354 MPOOL *mp;
355 {
356 struct _hqh *head;
357 BKT *bp;
358
359 /* If under the max cached, always create a new page. */
360 if (mp->curcache < mp->maxcache)
361 goto new;
362
363 /*
364 * If the cache is max'd out, walk the lru list for a buffer we
365 * can flush. If we find one, write it (if necessary) and take it
366 * off any lists. If we don't find anything we grow the cache anyway.
367 * The cache never shrinks.
368 */
369 for (bp = mp->lqh.cqh_first;
370 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
371 if (!(bp->flags & MPOOL_PINNED)) {
372 /* Flush if dirty. */
373 if (bp->flags & MPOOL_DIRTY &&
374 mpool_write(mp, bp) == RET_ERROR)
375 return (NULL);
376 #ifdef STATISTICS
377 ++mp->pageflush;
378 #endif
379 /* Remove from the hash and lru queues. */
380 head = &mp->hqh[HASHKEY(bp->pgno)];
381 CIRCLEQ_REMOVE(head, bp, hq);
382 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
383 #ifdef DEBUG
384 { void *spage;
385 spage = bp->page;
386 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
387 bp->page = spage;
388 }
389 #endif
390 return (bp);
391 }
392
393 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
394 return (NULL);
395 #ifdef STATISTICS
396 ++mp->pagealloc;
397 #endif
398 #if defined(DEBUG) || defined(PURIFY)
399 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
400 #endif
401 bp->page = (char *)bp + sizeof(BKT);
402 ++mp->curcache;
403 return (bp);
404 }
405
406 /**
407 * mpool_write
408 * Write a page to disk.
409 *
410 * @param mp
411 * @param bp
412 */
413 static int
414 mpool_write(mp, bp)
415 MPOOL *mp;
416 BKT *bp;
417 {
418 off_t off;
419
420 #ifdef STATISTICS
421 ++mp->pagewrite;
422 #endif
423
424 /* Run through the user's filter. */
425 if (mp->pgout)
426 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
427
428 off = mp->pagesize * bp->pgno;
429 #ifdef HAVE_PWRITE
430 if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
431 return (RET_ERROR);
432 #else
433 if (lseek(mp->fd, off, SEEK_SET) != off)
434 return (RET_ERROR);
435 if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
436 return (RET_ERROR);
437 #endif
438
439 bp->flags &= ~MPOOL_DIRTY;
440 return (RET_SUCCESS);
441 }
442
443 /**
444 * mpool_look
445 * Lookup a page in the cache.
446 *
447 * @param mp
448 * @param pgno
449 */
450 static BKT *
451 mpool_look(mp, pgno)
452 MPOOL *mp;
453 pgno_t pgno;
454 {
455 struct _hqh *head;
456 BKT *bp;
457
458 head = &mp->hqh[HASHKEY(pgno)];
459 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
460 if (bp->pgno == pgno) {
461 #ifdef STATISTICS
462 ++mp->cachehit;
463 #endif
464 return (bp);
465 }
466 #ifdef STATISTICS
467 ++mp->cachemiss;
468 #endif
469 return (NULL);
470 }
471
472 #ifdef STATISTICS
473 /**
474 * mpool_stat
475 * Print out cache statistics.
476 *
477 * @param mp
478 */
479 void
480 mpool_stat(mp)
481 MPOOL *mp;
482 {
483 BKT *bp;
484 int cnt;
485 char *sep;
486
487 (void)fprintf(stderr, "%lu pages in the file\n", (long unsigned int)mp->npages);
488 (void)fprintf(stderr,
489 "page size %lu, cacheing %lu pages of %lu page max cache\n",
490 mp->pagesize, (long unsigned int)mp->curcache, (long unsigned int)mp->maxcache);
491 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
492 mp->pageput, mp->pageget, mp->pagenew);
493 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
494 mp->pagealloc, mp->pageflush);
495 if (mp->cachehit + mp->cachemiss)
496 (void)fprintf(stderr,
497 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
498 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
499 * 100, mp->cachehit, mp->cachemiss);
500 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
501 mp->pageread, mp->pagewrite);
502
503 sep = "";
504 cnt = 0;
505 for (bp = mp->lqh.cqh_first;
506 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
507 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
508 if (bp->flags & MPOOL_DIRTY)
509 (void)fprintf(stderr, "d");
510 if (bp->flags & MPOOL_PINNED)
511 (void)fprintf(stderr, "P");
512 if (++cnt == 10) {
513 sep = "\n";
514 cnt = 0;
515 } else
516 sep = ", ";
517
518 }
519 (void)fprintf(stderr, "\n");
520 }
521 #endif
/* */