/* */
This source file includes following definitions.
- trim
- prepare_source
- prepare_skip
- issourcefile
- skipthisfile
- getrealpath
- has_symlinkloop
- getdirs
- set_accept_dotfiles
- find_open
- find_open_filelist
- find_read
- find_read_traverse
- find_read_filelist
- find_close
1 /*
2 * Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2005, 2006, 2008,
3 * 2009, 2011, 2012 Tama Communications Corporation
4 *
5 * This file is part of GNU GLOBAL.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <assert.h>
25 #include <ctype.h>
26 #ifdef HAVE_DIRENT_H
27 #include <sys/types.h>
28 #include <dirent.h>
29 #include <sys/stat.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #include <stdio.h>
35 #ifdef STDC_HEADERS
36 #include <stdlib.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #else
41 #include <strings.h>
42 #endif
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46
47 #include "gparam.h"
48 #include "regex.h"
49
50 #include "abs2rel.h"
51 #include "char.h"
52 #include "checkalloc.h"
53 #include "conf.h"
54 #include "die.h"
55 #include "find.h"
56 #include "getdbpath.h"
57 #include "is_unixy.h"
58 #include "langmap.h"
59 #include "locatestring.h"
60 #include "makepath.h"
61 #include "path.h"
62 #include "strbuf.h"
63 #include "strlimcpy.h"
64 #include "test.h"
65 #include "varray.h"
66
67 /*
68 * use an appropriate string comparison for the file system; define the position of the root slash.
69 */
70 #if defined(_WIN32) || defined(__DJGPP__)
71 #define STRCMP stricmp
72 #define STRNCMP strnicmp
73 #define ROOT 2
74 #else
75 #define STRCMP strcmp
76 #define STRNCMP strncmp
77 #define ROOT 0
78 #endif
79
80 /**
81 * @file
82 * usage of find_xxx()
83 *
84 * @code
85 * find_open(NULL);
86 * while (path = find_read()) {
87 * ...
88 * }
89 * find_close();
90 * @endcode
91 *
92 */
93 static regex_t *skip; /**< regex for skipping units */
94 static regex_t *suff; /**< regex for suffixes */
95 static STRBUF *list;
96 static int list_count;
97 static char **listarray; /**< list for skipping full path */
98 static FILE *ip;
99 static FILE *temp;
100 static char rootdir[PATH_MAX];
101 static char cwddir[MAXPATHLEN];
102 static int find_mode;
103 static int find_eof;
104 #define FIND_OPEN 1
105 #define FILELIST_OPEN 2
106
107 static void trim(char *);
108 static char *find_read_traverse(void);
109 static char *find_read_filelist(void);
110
111 extern int qflag;
112 #ifdef DEBUG
113 extern int debug;
114 #endif
115 static const int allow_blank = 1;
116 static const int check_looplink = 1;
117 static int accept_dotfiles = 0;
118 /**
119 * trim: remove blanks and @CODE{'\\'}.
120 */
121 static void
122 trim(char *s)
123 {
124 char *p;
125
126 for (p = s; *s; s++) {
127 if (isspace((unsigned char)*s))
128 continue;
129 if (*s == '\\' && *(s + 1))
130 s++;
131 *p++ = *s;
132 }
133 *p = 0;
134 }
135 /**
136 * prepare_source: preparing regular expression.
137 *
138 * <!-- @param[in] flags flags for regcomp. -->
139 * @return compiled regular expression for source files.
140 */
141 static regex_t *
142 prepare_source(void)
143 {
144 static regex_t suff_area;
145 STRBUF *sb = strbuf_open(0);
146 char *sufflist = NULL;
147 char *langmap = NULL;
148 int flags = REG_EXTENDED;
149
150 /*
151 * load icase_path option.
152 */
153 if (getconfb("icase_path"))
154 flags |= REG_ICASE;
155 #if defined(_WIN32) || defined(__DJGPP__)
156 flags |= REG_ICASE;
157 #endif
158 /*
159 * make suffix list.
160 */
161 strbuf_reset(sb);
162 if (getconfs("langmap", sb)) {
163 langmap = check_strdup(strbuf_value(sb));
164 }
165 strbuf_reset(sb);
166 make_suffixes(langmap ? langmap : DEFAULTLANGMAP, sb);
167 sufflist = check_strdup(strbuf_value(sb));
168 trim(sufflist);
169 {
170 const char *suffp;
171
172 strbuf_reset(sb);
173 strbuf_puts(sb, "\\.("); /* ) */
174 for (suffp = sufflist; suffp; ) {
175 const char *p;
176
177 for (p = suffp; *p && *p != ','; p++) {
178 if (!isalnum((unsigned char)*p))
179 strbuf_putc(sb, '\\');
180 strbuf_putc(sb, *p);
181 }
182 if (!*p)
183 break;
184 assert(*p == ',');
185 strbuf_putc(sb, '|');
186 suffp = ++p;
187 }
188 strbuf_puts(sb, ")$");
189 /*
190 * compile regular expression.
191 */
192 if (regcomp(&suff_area, strbuf_value(sb), flags) != 0)
193 die("cannot compile regular expression.");
194 }
195 strbuf_close(sb);
196 if (langmap)
197 free(langmap);
198 if (sufflist)
199 free(sufflist);
200 return &suff_area;
201 }
202 /**
203 * prepare_skip: prepare skipping files.
204 *
205 * @par Globals used (output):
206 * #listarray[]: skip list. <br>
207 * #list_count: count of skip list.
208 *
209 * @return compiled regular expression for skip files.
210 */
211 static regex_t *
212 prepare_skip(void)
213 {
214 static regex_t skip_area;
215 char *skiplist;
216 STRBUF *reg = strbuf_open(0);
217 char *p, *q;
218 int flags = REG_EXTENDED|REG_NEWLINE;
219
220 /*
221 * load icase_path option.
222 */
223 if (getconfb("icase_path"))
224 flags |= REG_ICASE;
225 #if defined(_WIN32) || defined(__DJGPP__)
226 flags |= REG_ICASE;
227 #endif
228 /*
229 * initialize common data.
230 */
231 if (!list)
232 list = strbuf_open(0);
233 else
234 strbuf_reset(list);
235 list_count = 0;
236 if (listarray)
237 (void)free(listarray);
238 listarray = (char **)0;
239 /*
240 * load skip data.
241 */
242 if (!getconfs("skip", reg)) {
243 strbuf_close(reg);
244 return NULL;
245 }
246 skiplist = check_strdup(strbuf_value(reg));
247 trim(skiplist);
248 strbuf_reset(reg);
249 /*
250 * construct regular expression.
251 */
252 strbuf_putc(reg, '('); /* ) */
253 /*
254 * Hard coded skip files:
255 * (1) files which start with '.'
256 * (2) tag files
257 */
258 /* skip files which start with '.' e.g. .cvsignore */
259 if (!accept_dotfiles) {
260 strbuf_puts(reg, "/\\.[^/]+$|");
261 strbuf_puts(reg, "/\\.[^/]+/|");
262 }
263 /* skip tag files */
264 strbuf_puts(reg, "/GTAGS$|");
265 strbuf_puts(reg, "/GRTAGS$|");
266 strbuf_puts(reg, "/GSYMS$|");
267 strbuf_puts(reg, "/GPATH$|");
268 for (p = skiplist; p; ) {
269 char *skipf = p;
270 if ((p = locatestring(p, ",", MATCH_FIRST)) != NULL)
271 *p++ = 0;
272 if (*skipf == '/') {
273 list_count++;
274 strbuf_puts0(list, skipf);
275 } else {
276 strbuf_putc(reg, '/');
277 for (q = skipf; *q; q++) {
278 if (isregexchar(*q))
279 strbuf_putc(reg, '\\');
280 strbuf_putc(reg, *q);
281 }
282 if (*(q - 1) != '/')
283 strbuf_putc(reg, '$');
284 if (p)
285 strbuf_putc(reg, '|');
286 }
287 }
288 strbuf_unputc(reg, '|');
289 strbuf_putc(reg, ')');
290 /*
291 * compile regular expression.
292 */
293 if (regcomp(&skip_area, strbuf_value(reg), flags) != 0)
294 die("cannot compile regular expression.");
295 if (list_count > 0) {
296 int i;
297 listarray = (char **)check_malloc(sizeof(char *) * list_count);
298 p = strbuf_value(list);
299 for (i = 0; i < list_count; i++) {
300 listarray[i] = p;
301 p += strlen(p) + 1;
302 }
303 }
304 strbuf_close(reg);
305 free(skiplist);
306
307 return &skip_area;
308 }
309 /**
310 * issourcefile: check whether or not a source file.
311 *
312 * @param[in] path path name (@STRONG{must} start with @FILE{./})
313 * @return 1: source file, 0: other file
314 */
315 int
316 issourcefile(const char *path)
317 {
318 if (suff == NULL) {
319 suff = prepare_source(); /* XXX this cannot return NULL */
320 if (suff == NULL)
321 die("prepare_source failed.");
322 }
323 if (regexec(suff, path, 0, 0, 0) == 0)
324 return 1;
325 return 0;
326 }
327 /**
328 * skipthisfile: check whether or not we accept this file.
329 *
330 * @param[in] path path name (@STRONG{must} start with @FILE{./})
331 * @return 1: skip, 0: don't skip
332 *
333 * @attention
334 * @STRONG{Specification of required path name:}
335 * - Path @STRONG{must} start with @FILE{./}.
336 * - Directory path name @STRONG{must} end with @FILE{/}.
337 */
338 int
339 skipthisfile(const char *path)
340 {
341 const char *first, *last;
342 int i;
343
344 /*
345 * unit check.
346 */
347 if (skip == NULL) {
348 skip = prepare_skip();
349 if (skip == NULL)
350 die("prepare_skip failed.");
351 }
352 if (regexec(skip, path, 0, 0, 0) == 0)
353 return 1;
354 /*
355 * list check.
356 */
357 if (list_count == 0)
358 return 0;
359 for (i = 0; i < list_count; i++) {
360 first = listarray[i];
361 last = first + strlen(first);
362 /*
363 * the path must start with "./".
364 */
365 if (*(last - 1) == '/') { /* it's a directory */
366 if (!STRNCMP(path + 1, first, last - first)) {
367 return 1;
368 }
369 } else {
370 if (!STRCMP(path + 1, first)) {
371 return 1;
372 }
373 }
374 }
375 return 0;
376 }
377
378 /**
379 * @name Directory Stack
380 */
381 /** @{ */
382 static char dir[MAXPATHLEN]; /**< directory path */
383 static VARRAY *stack; /**< dynamic allocated array */
384 struct stack_entry {
385 STRBUF *sb;
386 char *real;
387 char *dirp, *start, *end, *p;
388 };
389 static int current_entry; /**< current entry of the stack */
390 /** @} */
391
392 /**
393 * getrealpath: return a real path of dir using allocated area.
394 */
395 char *
396 getrealpath(const char *dir)
397 {
398 char real[PATH_MAX];
399
400 if (realpath(dir, real) == NULL)
401 die("cannot get real path of '%s'.", trimpath(dir));
402 return check_strdup(real);
403 }
404 /**
405 * has_symlinkloop: whether or not @a dir has a symbolic link loops.
406 *
407 * @param[in] dir directory (@STRONG{should} end by @FILE{/})
408 * @return 1: has a loop, 0: don't have a loop
409 */
410 int
411 has_symlinkloop(const char *dir)
412 {
413 struct stack_entry *sp;
414 char real[PATH_MAX], *p;
415 int i;
416
417 if (!strcmp(dir, "./"))
418 return 0;
419 if (realpath(dir, real) == NULL)
420 die("cannot get real path of '%s'.", trimpath(dir));
421 #ifdef SLOOPDEBUG
422 fprintf(stderr, "======== has_symlinkloop ======\n");
423 fprintf(stderr, "dir = '%s', real path = '%s'\n", dir, real);
424 fprintf(stderr, "TEST-1\n");
425 fprintf(stderr, "\tcheck '%s' < '%s'\n", real, rootdir);
426 #endif
427 p = locatestring(rootdir, real, MATCH_AT_FIRST);
428 if (p && (*p == '/' || *p == '\0' || !strcmp(real, "/")))
429 return 1;
430 sp = varray_assign(stack, 0, 0);
431 #ifdef SLOOPDEBUG
432 fprintf(stderr, "TEST-2\n");
433 #endif
434 for (i = current_entry; i >= 0; i--) {
435 #ifdef SLOOPDEBUG
436 fprintf(stderr, "%d:\tcheck '%s' == '%s'\n", i, real, sp[i].real);
437 #endif
438 if (!strcmp(sp[i].real, real))
439 return 1;
440 }
441 #ifdef SLOOPDEBUG
442 fprintf(stderr, "===============================\n");
443 #endif
444 return 0;
445 }
446
447 /**
448 * getdirs: get directory list
449 *
450 * @param[in] dir directory (@STRONG{should} end by @FILE{/})
451 * @param[out] sb string buffer
452 * @return -1: error, 0: normal
453 *
454 * @par format of directory list:
455 * @code
456 * |ddir1\0ffile1\0|
457 * @endcode
458 * means directory @FILE{dir1}, file @FILE{file1}.
459 */
460 static int
461 getdirs(const char *dir, STRBUF *sb)
462 {
463 DIR *dirp;
464 struct dirent *dp;
465 struct stat st;
466
467 if (check_looplink && has_symlinkloop(dir)) {
468 warning("symbolic link loop detected. '%s' is ignored.", trimpath(dir));
469 return -1;
470 }
471 if ((dirp = opendir(dir)) == NULL) {
472 warning("cannot open directory '%s'. ignored.", trimpath(dir));
473 return -1;
474 }
475 while ((dp = readdir(dirp)) != NULL) {
476 if (!strcmp(dp->d_name, "."))
477 continue;
478 if (!strcmp(dp->d_name, ".."))
479 continue;
480 if (stat(makepath(dir, dp->d_name, NULL), &st) < 0) {
481 warning("cannot stat '%s'. ignored.", trimpath(dp->d_name));
482 continue;
483 }
484 if (S_ISDIR(st.st_mode))
485 strbuf_putc(sb, 'd');
486 else if (S_ISREG(st.st_mode))
487 strbuf_putc(sb, 'f');
488 else
489 strbuf_putc(sb, ' ');
490 strbuf_puts(sb, dp->d_name);
491 strbuf_putc(sb, '\0');
492 }
493 (void)closedir(dirp);
494 return 0;
495 }
496 /**
497 * set_accept_dotfiles: make find to accept dot files and dot directries.
498 */
499 void
500 set_accept_dotfiles(void)
501 {
502 accept_dotfiles = 1;
503 }
504 /**
505 * find_open: start iterator without @VAR{GPATH}.
506 *
507 * @param[in] start start directory <br>
508 * If @VAR{NULL}, assumed @CODE{\".\"} directory.
509 */
510 void
511 find_open(const char *start)
512 {
513 struct stack_entry *curp;
514 assert(find_mode == 0);
515 find_mode = FIND_OPEN;
516
517 if (!start)
518 start = "./";
519 if (realpath(start, rootdir) == NULL)
520 die("cannot get real path of '%s'.", trimpath(dir));
521 /*
522 * setup stack.
523 */
524 stack = varray_open(sizeof(struct stack_entry), 50);
525 current_entry = 0;
526 curp = varray_assign(stack, current_entry, 1);
527 strlimcpy(dir, start, sizeof(dir));
528 curp->dirp = dir + strlen(dir);
529 curp->sb = strbuf_open(0);
530 curp->real = getrealpath(dir);
531 if (getdirs(dir, curp->sb) < 0)
532 die("Work is given up.");
533 curp->start = curp->p = strbuf_value(curp->sb);
534 curp->end = curp->start + strbuf_getlen(curp->sb);
535 strlimcpy(cwddir, get_root(), sizeof(cwddir));
536 }
537 /**
538 * find_open_filelist: find_open like interface for handling output of @XREF{find,1}.
539 *
540 * @param[in] filename file including list of file names. <br>
541 * When @FILE{-} is specified, read from standard input.
542 * @param[in] root root directory of source tree
543 */
544 void
545 find_open_filelist(const char *filename, const char *root)
546 {
547 assert(find_mode == 0);
548 find_mode = FILELIST_OPEN;
549
550 if (!strcmp(filename, "-")) {
551 /*
552 * If the filename is '-', copy standard input onto
553 * temporary file to be able to read repeatedly.
554 */
555 if (temp == NULL) {
556 char buf[MAXPATHLEN];
557
558 temp = tmpfile();
559 while (fgets(buf, sizeof(buf), stdin) != NULL)
560 fputs(buf, temp);
561 }
562 rewind(temp);
563 ip = temp;
564 } else {
565 ip = fopen(filename, "r");
566 if (ip == NULL)
567 die("cannot open '%s'.", trimpath(filename));
568 }
569 /*
570 * rootdir always ends with '/'.
571 */
572 if (!strcmp(root+ROOT, "/"))
573 strlimcpy(rootdir, root, sizeof(rootdir));
574 else
575 snprintf(rootdir, sizeof(rootdir), "%s/", root);
576 strlimcpy(cwddir, root, sizeof(cwddir));
577 }
578 /**
579 * find_read: read path without @VAR{GPATH}.
580 *
581 * @return path
582 */
583 char *
584 find_read(void)
585 {
586 static char *path;
587
588 assert(find_mode != 0);
589 if (find_eof)
590 path = NULL;
591 else if (find_mode == FILELIST_OPEN)
592 path = find_read_filelist();
593 else if (find_mode == FIND_OPEN)
594 path = find_read_traverse();
595 else
596 die("find_read: internal error.");
597 return path;
598 }
599 /**
600 * find_read_traverse: read path without @VAR{GPATH}.
601 *
602 * @return path
603 */
604 char *
605 find_read_traverse(void)
606 {
607 static char val[MAXPATHLEN];
608 char path[MAXPATHLEN];
609 struct stack_entry *curp = varray_assign(stack, current_entry, 1);
610
611 for (;;) {
612 while (curp->p < curp->end) {
613 char type = *(curp->p);
614 const char *unit = curp->p + 1;
615
616 curp->p += strlen(curp->p) + 1;
617
618 /*
619 * Skip files described in the skip list.
620 */
621 /* makepath() returns unsafe module local area. */
622 strlimcpy(path, makepath(dir, unit, NULL), sizeof(path));
623 if (type == 'd')
624 strcat(path, "/");
625 if (skipthisfile(path))
626 continue;
627 if (type == 'f') {
628 /*
629 * Skip the following:
630 * o directory
631 * o file which does not exist
632 * o dead symbolic link
633 */
634 if (!test("f", path)) {
635 if (test("d", path))
636 warning("'%s' is a directory. ignored.", trimpath(path));
637 else
638 warning("'%s' not found. ignored.", trimpath(path));
639 continue;
640 }
641 /*
642 * Now GLOBAL can treat the path which includes blanks.
643 * This message is obsoleted.
644 */
645 if (!allow_blank && locatestring(path, " ", MATCH_FIRST)) {
646 warning("'%s' ignored, because it includes blank.", trimpath(path));
647 continue;
648 }
649 /*
650 * A blank at the head of path means
651 * other than source file.
652 */
653 if (issourcefile(path)) {
654 strlimcpy(val, path, sizeof(val));
655 } else {
656 /* other file like 'Makefile' */
657 val[0] = ' ';
658 strlimcpy(&val[1], path, sizeof(val) - 1);
659 }
660 val[sizeof(val) - 1] = '\0';
661 return val;
662 }
663 if (type == 'd') {
664 STRBUF *sb = strbuf_open(0);
665 char *dirp = curp->dirp;
666 strcat(dirp, unit);
667 strcat(dirp, "/");
668 if (getdirs(dir, sb) < 0) {
669 strbuf_close(sb);
670 *(curp->dirp) = 0;
671 continue;
672 }
673 /*
674 * Push stack.
675 */
676 curp = varray_assign(stack, ++current_entry, 1);
677 curp->dirp = dirp + strlen(dirp);
678 curp->real = getrealpath(dir);
679 curp->sb = sb;
680 curp->start = curp->p = strbuf_value(sb);
681 curp->end = curp->start + strbuf_getlen(sb);
682 }
683 }
684 strbuf_close(curp->sb);
685 curp->sb = NULL;
686 free(curp->real);
687 curp->real = NULL;
688 if (current_entry == 0)
689 break;
690 /*
691 * Pop stack.
692 */
693 curp = varray_assign(stack, --current_entry, 0);
694 *(curp->dirp) = 0;
695 }
696 find_eof = 1;
697 return NULL;
698 }
699 /**
700 * find_read_filelist: read path from file
701 *
702 * @return path
703 */
704 static char *
705 find_read_filelist(void)
706 {
707 STATIC_STRBUF(ib);
708 static char buf[MAXPATHLEN + 1];
709 static char *path;
710
711 strbuf_clear(ib);
712 for (;;) {
713 path = strbuf_fgets(ib, ip, STRBUF_NOCRLF);
714 if (path == NULL) {
715 /* EOF */
716 find_eof = 1;
717 return NULL;
718 }
719 if (*path == '\0') {
720 /* skip empty line. */
721 continue;
722 }
723 /*
724 * Lines which start with ". " are considered to be comments.
725 */
726 if (*path == '.' && *(path + 1) == ' ')
727 continue;
728 /*
729 * Skip the following:
730 * o directory
731 * o file which does not exist
732 * o dead symbolic link
733 */
734 if (!test("f", path)) {
735 if (test("d", path))
736 warning("'%s' is a directory. ignored.", trimpath(path));
737 else
738 warning("'%s' not found. ignored.", trimpath(path));
739 continue;
740 }
741 /*
742 * normalize path name.
743 *
744 * rootdir /a/b/
745 * buf /a/b/c/d.c -> c/d.c -> ./c/d.c
746 */
747 if (normalize(path, rootdir, cwddir, buf, sizeof(buf)) == NULL) {
748 warning("'%s' is out of source tree. ignored.", trimpath(path));
749 continue;
750 }
751 path = buf;
752 /*
753 * Now GLOBAL can treat the path which includes blanks.
754 * This message is obsoleted.
755 */
756 if (!allow_blank && locatestring(path, " ", MATCH_LAST)) {
757 warning("'%s' ignored, because it includes blank.", trimpath(path));
758 continue;
759 }
760 if (skipthisfile(path))
761 continue;
762 /*
763 * A blank at the head of path means
764 * other than source file.
765 */
766 if (!issourcefile(path))
767 *--path = ' ';
768 return path;
769 }
770 }
771 /**
772 * find_close: close iterator.
773 */
774 void
775 find_close(void)
776 {
777 assert(find_mode != 0);
778 if (find_mode == FIND_OPEN) {
779 if (stack)
780 varray_close(stack);
781 } else if (find_mode == FILELIST_OPEN) {
782 /*
783 * The --file=- option is specified, we don't close file
784 * to read it repeatedly.
785 */
786 if (ip != temp)
787 fclose(ip);
788 } else {
789 die("find_close: internal error.");
790 }
791 if (suff)
792 regfree(suff);
793 if (skip)
794 regfree(skip);
795 find_eof = find_mode = 0;
796 }
/* */