/* */
This source file includes following definitions.
- abbrev_open
- abbrev_close
- abbrev_dump
- compress
- uncompress
1 /*
2 * Copyright (c) 2006
3 * Tama Communications Corporation
4 *
5 * This file is part of GNU GLOBAL.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <string.h>
27
28 #include "compress.h"
29 #include "die.h"
30 #include "gtagsop.h"
31 #include "strbuf.h"
32 #include "strlimcpy.h"
33 #include "varray.h"
34
35 /**
36 * @file
37 * Compress module
38 *
39 * Function compress() reduces the size of @NAME{GTAGS} by about 10-20% average.
40 *
41 * @par PROTOCOL:
42 *
43 * @code{.txt}
44 * meta record: " __.COMPRESS ddefine ttypedef"
45 *
46 * 'ddefine' means d => define
47 * 'ttypedef' means t => typedef
48 *
49 * source abbreviation
50 * -----------------------------------------
51 * @ @@
52 * <tag name> @n
53 * "define" @d
54 * "typedef" @t
55 * <spaces> @<digit> or @{<number>}
56 * @endcode
57 *
58 * @par EXAMPLE OF COMPRESS:
59 *
60 * @code
61 * 100 macro 23 #define macro(c) a; b;
62 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
63 * | [compress] ^ [uncompress]
64 * v |
65 * 100 macro 23 #@d @n(c) a;@6b;
66 * ~~~~~~~~~~~~~~~~
67 * @endcode
68 *
69 * @par DATA STRUCTURE
70 * <br>
71 * - #ab2name table is used to convert from abbreviation character
72 * to the string value. <br>
73 * - #name2ab table is used to convert from string value to the
74 * abbreviation character.
75 *
76 * @code
77 * ab2name = ('a' => NULL, ... , 'd' => "define", ... 'z' => NULL)
78 * name2ab = ("define" => 'a', "typdef" => 't')
79 * @endcode
80 */
81 struct abbrmap {
82 int c;
83 char *name;
84 int length;
85 };
86 static struct abbrmap ab2name[26];
87 static VARRAY *name2ab;
88 static char abbrev_string[1024];
89 /**
90 * setup two internal tables for abbreviation.
91 *
92 * @param[in] abbrev abbreviation string
93 */
94 void
95 abbrev_open(const char *abbrev)
96 {
97 int i, limit;
98 struct abbrmap *ab;
99 char *p;
100
101 /*
102 * abbrev string: "ddefine ttypedef"
103 */
104 /* copy abbrev string to static area */
105 strlimcpy(abbrev_string, abbrev, sizeof(abbrev_string));
106 p = abbrev_string;
107
108 /* initialize ab2name table */
109 limit = sizeof(ab2name) / sizeof(struct abbrmap);
110 for (i = 0; i < limit; i++) {
111 ab2name[i].c = 0;
112 ab2name[i].name = NULL;
113 }
114 name2ab = varray_open(sizeof(struct abbrmap), 5);
115 while (*p) {
116 ab = (struct abbrmap *)varray_append(name2ab);
117 ab->c = *p++;
118 ab->name = p;
119 for (; *p && *p != ' '; p++)
120 ;
121 if (*p == ' ')
122 *p++ = '\0';
123 ab->length = strlen(ab->name);
124 if (ab->c < 'a' || ab->c > 'z')
125 die("Abbrev character must be a lower alphabetic character. (%s)", abbrev);
126 i = ab->c - 'a';
127 ab2name[i].c = ab->c;
128 ab2name[i].name = ab->name;
129 ab2name[i].length = ab->length;
130 }
131 }
132 /**
133 * free allocated memory.
134 */
135 void
136 abbrev_close(void)
137 {
138 if (name2ab)
139 varray_close(name2ab);
140 name2ab = NULL;
141 }
142 /**
143 * @remark for debugging.
144 */
145 void
146 abbrev_dump(void)
147 {
148 struct abbrmap *ab;
149 int i, limit = sizeof(ab2name) / sizeof(struct abbrmap);
150
151 if (!name2ab) {
152 fprintf(stderr, "name2ab is NULL.\n");
153 return;
154 }
155 fprintf(stderr, "ab2name: %d entries\n", limit);
156 for (i = 0; i < limit; i++) {
157 if (ab2name[i].c != 0) {
158 fprintf(stderr, "ab2name[%d].c = %c\n", i, ab2name[i].c);
159 fprintf(stderr, "ab2name[%d].name = %s\n", i, ab2name[i].name);
160 }
161 }
162 ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
163 limit = name2ab->length;
164 fprintf(stderr, "name2ab: %d entries\n", limit);
165 for (i = 0; i < limit; i++) {
166 if (ab[i].c != 0) {
167 fprintf(stderr, "name2ab[%d].c = %c\n", i, ab[i].c);
168 fprintf(stderr, "name2ab[%d].name = %s\n", i, ab[i].name);
169 }
170 }
171 }
172 /**
173 * compress source line.
174 *
175 * @param[in] in source line
176 * @param[in] name replaced string
177 * @return compressed string
178 */
179 char *
180 compress(const char *in, const char *name)
181 {
182 STATIC_STRBUF(sb);
183 const char *p = in;
184 int length = strlen(name);
185 int spaces = 0;
186
187 strbuf_clear(sb);
188 while (*p) {
189 if (*p == ' ') {
190 spaces++;
191 p++;
192 continue;
193 }
194 if (spaces > 0) {
195 if (spaces >= 10) {
196 strbuf_putc(sb, '@');
197 strbuf_putc(sb, '{');
198 strbuf_putn(sb, spaces);
199 strbuf_putc(sb, '}');
200 } else if (spaces > 3) {
201 strbuf_putc(sb, '@');
202 strbuf_putn(sb, spaces);
203 } else {
204 strbuf_nputc(sb, ' ', spaces);
205 }
206 }
207 spaces = 0;
208 if (*p == '@') {
209 strbuf_puts(sb, "@@");
210 p++;
211 } else if (!strncmp(p, name, length)) {
212 strbuf_puts(sb, "@n");
213 p += length;
214 } else if (name2ab) {
215 int i, limit = name2ab->length;
216 struct abbrmap *ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
217
218 for (i = 0; i < limit; i++) {
219 if (!strncmp(p, ab[i].name, ab[i].length)) {
220 strbuf_putc(sb, '@');
221 strbuf_putc(sb, ab[i].c);
222 p += ab[i].length;
223 break;
224 }
225 }
226 if (i >= limit) {
227 strbuf_putc(sb, *p);
228 p++;
229 }
230 } else {
231 strbuf_putc(sb, *p);
232 p++;
233 }
234 }
235 if (spaces > 0) {
236 if (spaces < 4) {
237 strbuf_nputc(sb, ' ', spaces);
238 } else if (spaces < 10) {
239 strbuf_putc(sb, '@');
240 strbuf_putn(sb, spaces);
241 } else {
242 strbuf_putc(sb, '@');
243 strbuf_putc(sb, '{');
244 strbuf_putn(sb, spaces);
245 strbuf_putc(sb, '}');
246 }
247 }
248 return strbuf_value(sb);
249 }
250
251 /**
252 * uncompress source line.
253 *
254 * @param[in] in compressed string
255 * @param[in] name replaced string
256 * @return uncompressed string
257 */
258 char *
259 uncompress(const char *in, const char *name)
260 {
261 STATIC_STRBUF(sb);
262 const char *p;
263 int i;
264
265 strbuf_clear(sb);
266 for (p = in; *p; p++) {
267 if (*p == '@') {
268 int spaces = 0;
269
270 switch (*++p) {
271 case '@':
272 strbuf_putc(sb, '@');
273 break;
274 case 'n':
275 strbuf_puts(sb, name);
276 break;
277 case '{': /* } */
278 for (p++; *p && isdigit((unsigned char)*p); p++)
279 spaces = spaces * 10 + *p - '0';
280 break;
281 case '0':
282 case '1':
283 case '2':
284 case '3':
285 case '4':
286 case '5':
287 case '6':
288 case '7':
289 case '8':
290 case '9':
291 spaces = *p - '0';
292 break;
293 default:
294 if (*p < 'a' || *p > 'z')
295 die("Abbrev character must be a lower alphabetic character. (%c)", *p);
296 i = *p - 'a';
297 if (ab2name[i].name)
298 strbuf_puts(sb, ab2name[i].name);
299 break;
300 }
301 strbuf_nputc(sb, ' ', spaces);
302 } else {
303 strbuf_putc(sb, *p);
304 }
305 }
306 return strbuf_value(sb);
307 }
/* */