libutil/compress.c

libutil/compress.c
/* */
This source file includes following definitions.
abbrev_open
abbrev_close
abbrev_dump
compress
uncompress
   1 /*
   2  * Copyright (c) 2006
   3  *      Tama Communications Corporation
   4  *
   5  * This file is part of GNU GLOBAL.
   6  *
   7  * This program is free software: you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation, either version 3 of the License, or
  10  * (at your option) any later version.
  11  * 
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  * 
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19  */
  20 
  21 #ifdef HAVE_CONFIG_H
  22 #include <config.h>
  23 #endif
  24 #include <ctype.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27 
  28 #include "compress.h"
  29 #include "die.h"
  30 #include "gtagsop.h"
  31 #include "strbuf.h"
  32 #include "strlimcpy.h"
  33 #include "varray.h"
  34 
  35 /**
  36  * @file
  37  * Compress module
  38  *
  39  * Function compress() reduces the size of @NAME{GTAGS} by about 10-20% average.
  40  *
  41  * @par PROTOCOL:
  42  *
  43  * @code{.txt}
  44  *      meta record: " __.COMPRESS ddefine ttypedef"
  45  *
  46  *      'ddefine' means d => define
  47  *      'ttypedef' means t => typedef
  48  *
  49  *      source          abbreviation
  50  *      -----------------------------------------
  51  *      @               @@
  52  *      <tag name>      @n
  53  *      "define"        @d
  54  *      "typedef"       @t
  55  *      <spaces>        @<digit> or @{<number>}
  56  * @endcode
  57  *
  58  * @par EXAMPLE OF COMPRESS:
  59  *
  60  * @code
  61  *      100 macro 23 #define macro(c) a;      b;
  62  *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  63  *                   | [compress]   ^ [uncompress]
  64  *                   v              |
  65  *      100 macro 23 #@d @n(c) a;@6b;
  66  *                   ~~~~~~~~~~~~~~~~
  67  * @endcode
  68  *
  69  * @par DATA STRUCTURE
  70  * <br>
  71  *      - #ab2name table is used to convert from abbreviation character
  72  *        to the string value. <br>
  73  *      - #name2ab table is used to convert from string value to the
  74  *        abbreviation character.
  75  *        
  76  *      @code
  77  *      ab2name = ('a' => NULL, ... , 'd' => "define", ... 'z' => NULL)
  78  *      name2ab = ("define" => 'a', "typdef" => 't')
  79  *      @endcode
  80  */
  81 struct abbrmap {
  82         int c;
  83         char *name;
  84         int length;
  85 };
  86 static struct abbrmap ab2name[26];
  87 static VARRAY *name2ab;
  88 static char abbrev_string[1024];
  89 /**
  90  * setup two internal tables for abbreviation.
  91  *
  92  *      @param[in]      abbrev  abbreviation string
  93  */
  94 void
  95 abbrev_open(const char *abbrev)
  96 {
  97         int i, limit;
  98         struct abbrmap *ab;
  99         char *p;
 100 
 101         /*
 102          * abbrev string: "ddefine ttypedef"
 103          */
 104         /* copy abbrev string to static area */
 105         strlimcpy(abbrev_string, abbrev, sizeof(abbrev_string));
 106         p = abbrev_string;
 107 
 108         /* initialize ab2name table */
 109         limit = sizeof(ab2name) / sizeof(struct abbrmap);
 110         for (i = 0; i < limit; i++) {
 111                 ab2name[i].c = 0;
 112                 ab2name[i].name = NULL;
 113         }
 114         name2ab = varray_open(sizeof(struct abbrmap), 5);
 115         while (*p) {
 116                 ab = (struct abbrmap *)varray_append(name2ab);
 117                 ab->c = *p++;
 118                 ab->name = p;
 119                 for (; *p && *p != ' '; p++)
 120                         ;
 121                 if (*p == ' ')
 122                         *p++ = '\0';
 123                 ab->length = strlen(ab->name);
 124                 if (ab->c < 'a' || ab->c > 'z')
 125                         die("Abbrev character must be a lower alphabetic character. (%s)", abbrev);
 126                 i = ab->c - 'a';
 127                 ab2name[i].c = ab->c;
 128                 ab2name[i].name = ab->name;
 129                 ab2name[i].length = ab->length;
 130         }
 131 }
 132 /**
 133  * free allocated memory.
 134  */
 135 void
 136 abbrev_close(void)
 137 {
 138         if (name2ab)
 139                 varray_close(name2ab);
 140         name2ab = NULL;
 141 }
 142 /**
 143  * @remark for debugging.
 144  */
 145 void
 146 abbrev_dump(void)
 147 {
 148         struct abbrmap *ab;
 149         int i, limit = sizeof(ab2name) / sizeof(struct abbrmap);
 150 
 151         if (!name2ab) {
 152                 fprintf(stderr, "name2ab is NULL.\n");
 153                 return;
 154         }
 155         fprintf(stderr, "ab2name: %d entries\n", limit);
 156         for (i = 0; i < limit; i++) {
 157                 if (ab2name[i].c != 0) {
 158                         fprintf(stderr, "ab2name[%d].c    = %c\n", i, ab2name[i].c);
 159                         fprintf(stderr, "ab2name[%d].name = %s\n", i, ab2name[i].name);
 160                 }
 161         }
 162         ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
 163         limit = name2ab->length;
 164         fprintf(stderr, "name2ab: %d entries\n", limit);
 165         for (i = 0; i < limit; i++) {
 166                 if (ab[i].c != 0) {
 167                         fprintf(stderr, "name2ab[%d].c    = %c\n", i, ab[i].c);
 168                         fprintf(stderr, "name2ab[%d].name = %s\n", i, ab[i].name);
 169                 }
 170         }
 171 }
 172 /**
 173  * compress source line.
 174  *
 175  *      @param[in]      in      source line
 176  *      @param[in]      name    replaced string
 177  *      @return         compressed string
 178  */
 179 char *
 180 compress(const char *in, const char *name)
 181 {
 182         STATIC_STRBUF(sb);
 183         const char *p = in;
 184         int length = strlen(name);
 185         int spaces = 0;
 186 
 187         strbuf_clear(sb);
 188         while (*p) {
 189                 if (*p == ' ') {
 190                         spaces++;
 191                         p++;
 192                         continue;
 193                 }
 194                 if (spaces > 0) {
 195                         if (spaces >= 10) {
 196                                 strbuf_putc(sb, '@');
 197                                 strbuf_putc(sb, '{');
 198                                 strbuf_putn(sb, spaces);
 199                                 strbuf_putc(sb, '}');
 200                         } else if (spaces > 3) {
 201                                 strbuf_putc(sb, '@');
 202                                 strbuf_putn(sb, spaces);
 203                         } else {
 204                                 strbuf_nputc(sb, ' ', spaces);
 205                         }
 206                 }
 207                 spaces = 0;
 208                 if (*p == '@') {
 209                         strbuf_puts(sb, "@@");
 210                         p++;
 211                 } else if (!strncmp(p, name, length)) {
 212                         strbuf_puts(sb, "@n");
 213                         p += length;
 214                 } else if (name2ab) {
 215                         int i, limit = name2ab->length;
 216                         struct abbrmap *ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
 217 
 218                         for (i = 0; i < limit; i++) {
 219                                 if (!strncmp(p, ab[i].name, ab[i].length)) {
 220                                         strbuf_putc(sb, '@');
 221                                         strbuf_putc(sb, ab[i].c);
 222                                         p += ab[i].length;
 223                                         break;
 224                                 }
 225                         }
 226                         if (i >= limit) {
 227                                 strbuf_putc(sb, *p);
 228                                 p++;
 229                         }
 230                 } else {
 231                         strbuf_putc(sb, *p);
 232                         p++;
 233                 }
 234         }
 235         if (spaces > 0) {
 236                 if (spaces < 4) {
 237                         strbuf_nputc(sb, ' ', spaces);
 238                 } else if (spaces < 10) {
 239                         strbuf_putc(sb, '@');
 240                         strbuf_putn(sb, spaces);
 241                 } else {
 242                         strbuf_putc(sb, '@');
 243                         strbuf_putc(sb, '{');
 244                         strbuf_putn(sb, spaces);
 245                         strbuf_putc(sb, '}');
 246                 }
 247         }
 248         return strbuf_value(sb);
 249 }
 250 
 251 /**
 252  * uncompress source line.
 253  *
 254  *      @param[in]      in      compressed string
 255  *      @param[in]      name    replaced string
 256  *      @return         uncompressed string
 257  */
 258 char *
 259 uncompress(const char *in, const char *name)
 260 {
 261         STATIC_STRBUF(sb);
 262         const char *p;
 263         int i;
 264 
 265         strbuf_clear(sb);
 266         for (p = in;  *p; p++) {
 267                 if (*p == '@') {
 268                         int spaces = 0;
 269 
 270                         switch (*++p) {
 271                         case '@':
 272                                 strbuf_putc(sb, '@');
 273                                 break;
 274                         case 'n':
 275                                 strbuf_puts(sb, name);
 276                                 break;
 277                         case '{':       /* } */
 278                                 for (p++; *p && isdigit((unsigned char)*p); p++)
 279                                         spaces = spaces * 10 + *p - '0';
 280                                 break;
 281                         case '0':
 282                         case '1':
 283                         case '2':
 284                         case '3':
 285                         case '4':
 286                         case '5':
 287                         case '6':
 288                         case '7':
 289                         case '8':
 290                         case '9':
 291                                 spaces = *p - '0';
 292                                 break;
 293                         default:
 294                                 if (*p < 'a' || *p > 'z')
 295                                         die("Abbrev character must be a lower alphabetic character. (%c)", *p);
 296                                 i = *p - 'a';
 297                                 if (ab2name[i].name)
 298                                         strbuf_puts(sb, ab2name[i].name);
 299                                 break;
 300                         }
 301                         strbuf_nputc(sb, ' ', spaces);
 302                 } else {
 303                         strbuf_putc(sb, *p);
 304                 }
 305         }
 306         return strbuf_value(sb);
 307 }
/* */
root/libutil/compress.c

DEFINITIONS