From e7583a1a31441f13c6938ef98917dba74359093c Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Thu, 04 Nov 2010 19:58:09 +0000 Subject: FreeBSD locate code from FreeBSD 8 Taken from SVN. svn log says r196045 | kensmith | 2009-08-03 04:13:06 -0400 (Mon, 03 Aug 2009) | 4 lines Copy head to stable/8 as part of 8.0 Release cycle. with some massaging of the Makefiles to get bigram and code binaries --- diff --git a/locate.freebsd/Makefile b/locate.freebsd/Makefile new file mode 100644 index 0000000..1d747c2 --- /dev/null +++ b/locate.freebsd/Makefile @@ -0,0 +1,8 @@ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 +# $FreeBSD$ + + +SUBDIR= bigram code locate +CFLAGS+= -I./locate/ + +all: bigram/locate.bigram code/locate.code diff --git a/locate.freebsd/bigram/locate.bigram.c b/locate.freebsd/bigram/locate.bigram.c new file mode 100644 index 0000000..208824f --- /dev/null +++ b/locate.freebsd/bigram/locate.bigram.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ + +/* + * bigram < sorted_file_names | sort -nr | + * awk 'NR <= 128 { printf $2 }' > bigrams + * + * List bigrams for 'updatedb' script. + * Use 'code' to encode a file using this output. + */ + +#include +#include +#include /* for MAXPATHLEN */ +#include "locate.h" + +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; +u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; + +int +main(void) +{ + register u_char *cp; + register u_char *oldpath = buf1, *path = buf2; + register u_int i, j; + + while (fgets(path, sizeof(buf2), stdin) != NULL) { + + /* + * We don't need remove newline character '\n'. + * '\n' is less than ASCII_MIN and will be later + * ignored at output. + */ + + + /* skip longest common prefix */ + for (cp = path; *cp == *oldpath; cp++, oldpath++) + if (*cp == '\0') + break; + + while (*cp != '\0' && *(cp + 1) != '\0') { + bigram[(u_char)*cp][(u_char)*(cp + 1)]++; + cp += 2; + } + + /* swap pointers */ + if (path == buf1) { + path = buf2; + oldpath = buf1; + } else { + path = buf1; + oldpath = buf2; + } + } + + /* output, boundary check */ + for (i = ASCII_MIN; i <= ASCII_MAX; i++) + for (j = ASCII_MIN; j <= ASCII_MAX; j++) + if (bigram[i][j] != 0) + (void)printf("%4u %c%c\n", bigram[i][j], i, j); + + exit(0); +} diff --git a/locate.freebsd/code/locate.code.c b/locate.freebsd/code/locate.code.c new file mode 100644 index 0000000..6290c67 --- /dev/null +++ b/locate.freebsd/code/locate.code.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ + +/* + * PURPOSE: sorted list compressor (works with a modified 'find' + * to encode/decode a filename database) + * + * USAGE: bigram < list > bigrams + * process bigrams (see updatedb) > common_bigrams + * code common_bigrams < list > squozen_list + * + * METHOD: Uses 'front compression' (see ";login:", Volume 8, Number 1 + * February/March 1983, p. 8). Output format is, per line, an + * offset differential count byte followed by a partially bigram- + * encoded ascii residue. A bigram is a two-character sequence, + * the first 128 most common of which are encoded in one byte. + * + * EXAMPLE: For simple front compression with no bigram encoding, + * if the input is... then the output is... + * + * /usr/src 0 /usr/src + * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c + * /usr/src/cmd/armadillo.c 14 armadillo.c + * /usr/tmp/zoo 5 tmp/zoo + * + * The codes are: + * + * 0-28 likeliest differential counts + offset to make nonnegative + * 30 switch code for out-of-range count to follow in next word + * 31 an 8 bit char followed + * 128-255 bigram codes (128 most common, as determined by 'updatedb') + * 32-127 single character (printable) ascii residue (ie, literal) + * + * The locate database store any character except newline ('\n') + * and NUL ('\0'). The 8-bit character support don't wast extra + * space until you have characters in file names less than 32 + * or greather than 127. + * + * + * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c + * + * AUTHOR: James A. Woods, Informatics General Corp., + * NASA Ames Research Center, 10/82 + * 8-bit file names characters: + * Wolfram Schneider, Berlin September 1996 + */ + +#include +#include +#include +#include +#include +#include +#include +#include "locate.h" + +#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ + +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; +u_char bigrams[BGBUFSIZE + 1] = { 0 }; + +#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ + +#ifdef LOOKUP +#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)]) +typedef short bg_t; +bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1]; +#else +#define BGINDEX(x) bgindex(x) +typedef int bg_t; +int bgindex(char *); +#endif /* LOOKUP */ + + +void usage(void); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + register u_char *cp, *oldpath, *path; + int ch, code, count, diffcount, oldcount; + FILE *fp; + register int i, j; + + while ((ch = getopt(argc, argv, "")) != -1) + switch(ch) { + default: + usage(); + } + argc -= optind; + argv += optind; + + if (argc != 1) + usage(); + + if ((fp = fopen(argv[0], "r")) == NULL) + err(1, "%s", argv[0]); + + /* First copy bigram array to stdout. */ + (void)fgets(bigrams, BGBUFSIZE + 1, fp); + + if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) + err(1, "stdout"); + (void)fclose(fp); + +#ifdef LOOKUP + /* init lookup table */ + for (i = 0; i < UCHAR_MAX + 1; i++) + for (j = 0; j < UCHAR_MAX + 1; j++) + big[i][j] = (bg_t)-1; + + for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) + big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i; + +#endif /* LOOKUP */ + + oldpath = buf1; + path = buf2; + oldcount = 0; + + while (fgets(path, sizeof(buf2), stdin) != NULL) { + + /* skip empty lines */ + if (*path == '\n') + continue; + + /* remove newline */ + for (cp = path; *cp != '\0'; cp++) { +#ifndef LOCATE_CHAR30 + /* old locate implementations core'd for char 30 */ + if (*cp == SWITCH) + *cp = '?'; + else +#endif /* !LOCATE_CHAR30 */ + + /* chop newline */ + if (*cp == '\n') + *cp = '\0'; + } + + /* Skip longest common prefix. */ + for (cp = path; *cp == *oldpath; cp++, oldpath++) + if (*cp == '\0') + break; + + count = cp - path; + diffcount = count - oldcount + OFFSET; + oldcount = count; + if (diffcount < 0 || diffcount > 2 * OFFSET) { + if (putchar(SWITCH) == EOF || + putw(diffcount, stdout) == EOF) + err(1, "stdout"); + } else + if (putchar(diffcount) == EOF) + err(1, "stdout"); + + while (*cp != '\0') { + /* print *two* characters */ + + if ((code = BGINDEX(cp)) != (bg_t)-1) { + /* + * print *one* as bigram + * Found, so mark byte with + * parity bit. + */ + if (putchar((code / 2) | PARITY) == EOF) + err(1, "stdout"); + cp += 2; + } + + else { + for (i = 0; i < 2; i++) { + if (*cp == '\0') + break; + + /* print umlauts in file names */ + if (*cp < ASCII_MIN || + *cp > ASCII_MAX) { + if (putchar(UMLAUT) == EOF || + putchar(*cp++) == EOF) + err(1, "stdout"); + } + + else { + /* normal character */ + if(putchar(*cp++) == EOF) + err(1, "stdout"); + } + } + + } + } + + if (path == buf1) { /* swap pointers */ + path = buf2; + oldpath = buf1; + } else { + path = buf1; + oldpath = buf2; + } + } + /* Non-zero status if there were errors */ + if (fflush(stdout) != 0 || ferror(stdout)) + exit(1); + exit(0); +} + +#ifndef LOOKUP +int +bgindex(bg) /* Return location of bg in bigrams or -1. */ + char *bg; +{ + register char bg0, bg1, *p; + + bg0 = bg[0]; + bg1 = bg[1]; + for (p = bigrams; *p != NULL; p++) + if (*p++ == bg0 && *p == bg1) + break; + return (*p == NULL ? -1 : (--p - bigrams)); +} +#endif /* !LOOKUP */ + +void +usage() +{ + (void)fprintf(stderr, + "usage: locate.code common_bigrams < list > squozen_list\n"); + exit(1); +} diff --git a/locate.freebsd/locate/Makefile b/locate.freebsd/locate/Makefile new file mode 100644 index 0000000..9a618da --- /dev/null +++ b/locate.freebsd/locate/Makefile @@ -0,0 +1,21 @@ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 +# $FreeBSD$ + +PROG= locate +SRCS= util.c locate.c +CFLAGS+= -I${.CURDIR} -DMMAP # -DDEBUG (print time) -O2 (10% faster) +SCRIPTS=updatedb.sh mklocatedb.sh concatdb.sh +MAN= locate.1 locate.updatedb.8 + +SCRIPTSDIR= ${LIBEXECDIR} +.for script in ${SCRIPTS} +SCRIPTSNAME_${script}= locate.${script:R} +.endfor +MLINKS+= locate.updatedb.8 updatedb.8 + +# only /usr/src/etc/Makefile install files in /etc +# ${INSTALL} -o root -g wheel -m 644 \ +# ${.CURDIR}/locate.rc ${DESTDIR}/etc + +.include "../../Makefile.inc" +.include diff --git a/locate.freebsd/locate/concatdb.sh b/locate.freebsd/locate/concatdb.sh new file mode 100644 index 0000000..156f21a --- /dev/null +++ b/locate.freebsd/locate/concatdb.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# concatdb - concatenate locate databases +# +# usage: concatdb database1 ... databaseN > newdb +# +# Sequence of databases is important. +# +# $FreeBSD$ + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + +umask 077 # protect temp files + +: ${TMPDIR:=/var/tmp}; export TMPDIR; +test -d "$TMPDIR" || TMPDIR=/var/tmp + +# utilities to built locate database +: ${bigram:=locate.bigram} +: ${code:=locate.code} +: ${sort:=sort} +: ${locate:=locate} + + +case $# in + [01]) echo 'usage: concatdb databases1 ... databaseN > newdb' + exit 1 + ;; +esac + + +bigrams=`mktemp ${TMPDIR=/tmp}/_bigrams.XXXXXXXXXX` || exit 1 +trap 'rm -f $bigrams' 0 1 2 3 5 10 15 + +for db +do + $locate -d $db / +done | $bigram | $sort -nr | awk 'NR <= 128 { printf $2 }' > $bigrams + +for db +do + $locate -d $db / +done | $code $bigrams diff --git a/locate.freebsd/locate/fastfind.c b/locate.freebsd/locate/fastfind.c new file mode 100644 index 0000000..21148bc --- /dev/null +++ b/locate.freebsd/locate/fastfind.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +#ifndef _LOCATE_STATISTIC_ +#define _LOCATE_STATISTIC_ + +void +statistic (fp, path_fcodes) + FILE *fp; /* open database */ + char *path_fcodes; /* for error message */ +{ + register int lines, chars, size, big, zwerg; + register u_char *p, *s; + register int c; + int count, umlaut; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } + + lines = chars = big = zwerg = umlaut = 0; + size = NBG + NBG; + + for (c = getc(fp), count = 0; c != EOF; size++) { + if (c == SWITCH) { + count += getwf(fp) - OFFSET; + size += sizeof(int); + zwerg++; + } else + count += c - OFFSET; + + for (p = path + count; (c = getc(fp)) > SWITCH; size++) + if (c < PARITY) { + if (c == UMLAUT) { + c = getc(fp); + size++; + umlaut++; + } + p++; + } else { + /* bigram char */ + big++; + p += 2; + } + + p++; + lines++; + chars += (p - path); + } + + (void)printf("\nDatabase: %s\n", path_fcodes); + (void)printf("Compression: Front: %2.2f%%, ", + (size + big - (2 * NBG)) / (chars / (float)100)); + (void)printf("Bigram: %2.2f%%, ", (size - big) / (size / (float)100)); + (void)printf("Total: %2.2f%%\n", + (size - (2 * NBG)) / (chars / (float)100)); + (void)printf("Filenames: %d, ", lines); + (void)printf("Characters: %d, ", chars); + (void)printf("Database size: %d\n", size); + (void)printf("Bigram characters: %d, ", big); + (void)printf("Integers: %d, ", zwerg); + (void)printf("8-Bit characters: %d\n", umlaut); + +} +#endif /* _LOCATE_STATISTIC_ */ + +extern char separator; + +void +#ifdef FF_MMAP + + +#ifdef FF_ICASE +fastfind_mmap_icase +#else +fastfind_mmap +#endif /* FF_ICASE */ +(pathpart, paddr, len, database) + char *pathpart; /* search string */ + caddr_t paddr; /* mmap pointer */ + int len; /* length of database */ + char *database; /* for error message */ + + +#else /* MMAP */ + + +#ifdef FF_ICASE +fastfind_icase +#else +fastfind +#endif /* FF_ICASE */ + +(fp, pathpart, database) + FILE *fp; /* open database */ + char *pathpart; /* search string */ + char *database; /* for error message */ + + +#endif /* MMAP */ + +{ + register u_char *p, *s, *patend, *q, *foundchar; + register int c, cc; + int count, found, globflag; + u_char *cutoff; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + +#ifdef FF_ICASE + /* use a lookup table for case insensitive search */ + u_char table[UCHAR_MAX + 1]; + + tolower_word(pathpart); +#endif /* FF_ICASE*/ + + /* init bigram table */ +#ifdef FF_MMAP + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { + p[c] = check_bigram_char(*paddr++); + s[c] = check_bigram_char(*paddr++); + } +#else + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } +#endif /* FF_MMAP */ + + /* find optimal (last) char for searching */ + for (p = pathpart; *p != '\0'; p++) + if (index(LOCATE_REG, *p) != NULL) + break; + + if (*p == '\0') + globflag = 0; + else + globflag = 1; + + p = pathpart; + patend = patprep(p); + cc = *patend; + +#ifdef FF_ICASE + /* set patend char to true */ + for (c = 0; c < UCHAR_MAX + 1; c++) + table[c] = 0; + + table[TOLOWER(*patend)] = 1; + table[toupper(*patend)] = 1; +#endif /* FF_ICASE */ + + + /* main loop */ + found = count = 0; + foundchar = 0; + +#ifdef FF_MMAP + c = (u_char)*paddr++; len--; + for (; len > 0; ) { +#else + c = getc(fp); + for (; c != EOF; ) { +#endif /* FF_MMAP */ + + /* go forward or backward */ + if (c == SWITCH) { /* big step, an integer */ +#ifdef FF_MMAP + count += getwm(paddr) - OFFSET; + len -= INTSIZE; paddr += INTSIZE; +#else + count += getwf(fp) - OFFSET; +#endif /* FF_MMAP */ + } else { /* slow step, =< 14 chars */ + count += c - OFFSET; + } + + if (count < 0 || count > MAXPATHLEN) + errx(1, "corrupted database: %s", database); + /* overlay old path */ + p = path + count; + foundchar = p - 1; + +#ifdef FF_MMAP + for (; len > 0;) { + c = (u_char)*paddr++; + len--; +#else + for (;;) { + c = getc(fp); +#endif /* FF_MMAP */ + /* + * == UMLAUT: 8 bit char followed + * <= SWITCH: offset + * >= PARITY: bigram + * rest: single ascii char + * + * offset < SWITCH < UMLAUT < ascii < PARITY < bigram + */ + if (c < PARITY) { + if (c <= UMLAUT) { + if (c == UMLAUT) { +#ifdef FF_MMAP + c = (u_char)*paddr++; + len--; +#else + c = getc(fp); +#endif /* FF_MMAP */ + + } else + break; /* SWITCH */ + } +#ifdef FF_ICASE + if (table[c]) +#else + if (c == cc) +#endif /* FF_ICASE */ + foundchar = p; + *p++ = c; + } + else { + /* bigrams are parity-marked */ + TO7BIT(c); + +#ifndef FF_ICASE + if (bigram1[c] == cc || + bigram2[c] == cc) +#else + + if (table[bigram1[c]] || + table[bigram2[c]]) +#endif /* FF_ICASE */ + foundchar = p + 1; + + *p++ = bigram1[c]; + *p++ = bigram2[c]; + } + } + + if (found) { /* previous line matched */ + cutoff = path; + *p-- = '\0'; + foundchar = p; + } else if (foundchar >= path + count) { /* a char matched */ + *p-- = '\0'; + cutoff = path + count; + } else /* nothing to do */ + continue; + + found = 0; + for (s = foundchar; s >= cutoff; s--) { + if (*s == cc +#ifdef FF_ICASE + || TOLOWER(*s) == cc +#endif /* FF_ICASE */ + ) { /* fast first char check */ + for (p = patend - 1, q = s - 1; *p != '\0'; + p--, q--) + if (*q != *p +#ifdef FF_ICASE + && TOLOWER(*q) != *p +#endif /* FF_ICASE */ + ) + break; + if (*p == '\0') { /* fast match success */ + found = 1; + if (!globflag || +#ifndef FF_ICASE + !fnmatch(pathpart, path, 0)) +#else + !fnmatch(pathpart, path, + FNM_CASEFOLD)) +#endif /* !FF_ICASE */ + { + if (f_silent) + counter++; + else if (f_limit) { + counter++; + if (f_limit >= counter) + (void)printf("%s%c",path,separator); + else + errx(0, "[show only %d lines]", counter - 1); + } else + (void)printf("%s%c",path,separator); + } + break; + } + } + } + } +} diff --git a/locate.freebsd/locate/locate.1 b/locate.freebsd/locate/locate.1 new file mode 100644 index 0000000..c436cd4 --- /dev/null +++ b/locate.freebsd/locate/locate.1 @@ -0,0 +1,276 @@ +.\" Copyright (c) 1995 Wolfram Schneider . Berlin. +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)locate.1 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD$ +.\" +.Dd August 17, 2006 +.Dt LOCATE 1 +.Os +.Sh NAME +.Nm locate +.Nd find filenames quickly +.Sh SYNOPSIS +.Nm +.Op Fl 0Scims +.Op Fl l Ar limit +.Op Fl d Ar database +.Ar pattern ... +.Sh DESCRIPTION +The +.Nm +program searches a database for all pathnames which match the specified +.Ar pattern . +The database is recomputed periodically (usually weekly or daily), +and contains the pathnames +of all files which are publicly accessible. +.Pp +Shell globbing and quoting characters +.Dq ( * , +.Dq \&? , +.Dq \e , +.Dq \&[ +and +.Dq \&] ) +may be used in +.Ar pattern , +although they will have to be escaped from the shell. +Preceding any character with a backslash +.Pq Dq \e +eliminates any special +meaning which it may have. +The matching differs in that no characters must be matched explicitly, +including slashes +.Pq Dq / . +.Pp +As a special case, a pattern containing no globbing characters +.Pq Dq foo +is matched as though it were +.Dq *foo* . +.Pp +Historically, locate only stored characters between 32 and 127. +The +current implementation store any character except newline +.Pq Sq \en +and +.Dv NUL +.Pq Sq \e0 . +The 8-bit character support does not waste extra space for +plain ASCII file names. +Characters less than 32 or greater than 127 +are stored in 2 bytes. +.Pp +The following options are available: +.Bl -tag -width 10n +.It Fl 0 +Print pathnames separated by an +.Tn ASCII +.Dv NUL +character (character code 0) instead of default NL +(newline, character code 10). +.It Fl S +Print some statistics about the database and exit. +.It Fl c +Suppress normal output; instead print a count of matching file names. +.It Fl d Ar database +Search in +.Ar database +instead of the default file name database. +Multiple +.Fl d +options are allowed. +Each additional +.Fl d +option adds the specified database to the list +of databases to be searched. +.Pp +The option +.Ar database +may be a colon-separated list of databases. +A single colon is a reference +to the default database. +.Bd -literal +$ locate -d $HOME/lib/mydb: foo +.Ed +.Pp +will first search string +.Dq foo +in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database . +.Bd -literal +$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo +.Ed +.Pp +will first search string +.Dq foo +in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database +and then in +.Pa /cdrom/locate.database . +.Pp +.Dl "$ locate -d db1 -d db2 -d db3 pattern" +.Pp +is the same as +.Pp +.Dl "$ locate -d db1:db2:db3 pattern" +.Pp +or +.Pp +.Dl "$ locate -d db1:db2 -d db3 pattern" +.Pp +If +.Fl +is given as the database name, standard input will be read instead. +For example, you can compress your database +and use: +.Bd -literal +$ zcat database.gz | locate -d - pattern +.Ed +.Pp +This might be useful on machines with a fast CPU and little RAM and slow +I/O. +Note: you can only use +.Em one +pattern for stdin. +.It Fl i +Ignore case distinctions in both the pattern and the database. +.It Fl l Ar number +Limit output to +.Ar number +of file names and exit. +.It Fl m +Use +.Xr mmap 2 +instead of the +.Xr stdio 3 +library. +This is the default behavior +and is faster in most cases. +.It Fl s +Use the +.Xr stdio 3 +library instead of +.Xr mmap 2 . +.El +.Sh ENVIRONMENT +.Bl -tag -width LOCATE_PATH -compact +.It Pa LOCATE_PATH +path to the locate database if set and not empty, ignored if the +.Fl d +option was specified. +.El +.Sh FILES +.Bl -tag -width /etc/periodic/weekly/310.locate -compact +.It Pa /var/db/locate.database +locate database +.It Pa /usr/libexec/locate.updatedb +Script to update the locate database +.It Pa /etc/periodic/weekly/310.locate +Script that starts the database rebuild +.El +.Sh SEE ALSO +.Xr find 1 , +.Xr whereis 1 , +.Xr which 1 , +.Xr fnmatch 3 , +.Xr locate.updatedb 8 +.Rs +.%A Woods, James A. +.%D 1983 +.%T "Finding Files Fast" +.%J ";login" +.%V 8:1 +.%P pp. 8-10 +.Re +.Sh HISTORY +The +.Nm +command first appeared in +.Bx 4.4 . +Many new features were +added in +.Fx 2.2 . +.Sh BUGS +The +.Nm +program may fail to list some files that are present, or may +list files that have been removed from the system. +This is because +locate only reports files that are present in the database, which is +typically only regenerated once a week by the +.Pa /etc/periodic/weekly/310.locate +script. +Use +.Xr find 1 +to locate files that are of a more transitory nature. +.Pp +The +.Nm +database is typically built by user +.Dq nobody +and the +.Xr locate.updatedb 8 +utility skips directories +which are not readable for user +.Dq nobody , +group +.Dq nobody , +or +world. +For example, if your HOME directory is not world-readable, +.Em none +of your files are +in the database. +.Pp +The +.Nm +database is not byte order independent. +It is not possible +to share the databases between machines with different byte order. +The current +.Nm +implementation understands databases in host byte order or +network byte order if both architectures use the same integer size. +So on a +.Fx Ns /i386 +machine +(little endian), you can read +a locate database which was built on SunOS/sparc machine +(big endian, net). +.Pp +The +.Nm +utility does not recognize multibyte characters. diff --git a/locate.freebsd/locate/locate.c b/locate.freebsd/locate/locate.c new file mode 100644 index 0000000..67a97da --- /dev/null +++ b/locate.freebsd/locate/locate.c @@ -0,0 +1,366 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ +@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; +#endif +static const char rcsid[] = + "$FreeBSD$"; +#endif /* not lint */ + +/* + * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. + * + * Locate scans a file list for the full pathname of a file given only part + * of the name. The list has been processed with with "front-compression" + * and bigram coding. Front compression reduces space by a factor of 4-5, + * bigram coding by a further 20-25%. + * + * The codes are: + * + * 0-28 likeliest differential counts + offset to make nonnegative + * 30 switch code for out-of-range count to follow in next word + * 31 an 8 bit char followed + * 128-255 bigram codes (128 most common, as determined by 'updatedb') + * 32-127 single character (printable) ascii residue (ie, literal) + * + * A novel two-tiered string search technique is employed: + * + * First, a metacharacter-free subpattern and partial pathname is matched + * BACKWARDS to avoid full expansion of the pathname list. The time savings + * is 40-50% over forward matching, which cannot efficiently handle + * overlapped search patterns and compressed path residue. + * + * Then, the actual shell glob-style regular expression (if in this form) is + * matched against the candidate pathnames using the slower routines provided + * in the standard 'find'. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef MMAP +# include +# include +# include +# include +#endif + + +#include "locate.h" +#include "pathnames.h" + +#ifdef DEBUG +# include +# include +# include +#endif + +int f_mmap; /* use mmap */ +int f_icase; /* ignore case */ +int f_stdin; /* read database from stdin */ +int f_statistic; /* print statistic */ +int f_silent; /* suppress output, show only count of matches */ +int f_limit; /* limit number of output lines, 0 == infinite */ +u_int counter; /* counter for matches [-c] */ +char separator='\n'; /* line separator */ + + +void usage(void); +void statistic(FILE *, char *); +void fastfind(FILE *, char *, char *); +void fastfind_icase(FILE *, char *, char *); +void fastfind_mmap(char *, caddr_t, int, char *); +void fastfind_mmap_icase(char *, caddr_t, int, char *); +void search_mmap(char *, char **); +void search_fopen(char *, char **); +unsigned long cputime(void); + +extern char **colon(char **, char*, char*); +extern void print_matches(u_int); +extern int getwm(caddr_t); +extern int getwf(FILE *); +extern u_char *tolower_word(u_char *); +extern int check_bigram_char(int); +extern char *patprep(char *); + +int +main(argc, argv) + int argc; + char **argv; +{ + register int ch; + char **dbv = NULL; + char *path_fcodes; /* locate database */ +#ifdef MMAP + f_mmap = 1; /* mmap is default */ +#endif + (void) setlocale(LC_ALL, ""); + + while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) + switch(ch) { + case '0': /* 'find -print0' style */ + separator = '\0'; + break; + case 'S': /* statistic lines */ + f_statistic = 1; + break; + case 'l': /* limit number of output lines, 0 == infinite */ + f_limit = atoi(optarg); + break; + case 'd': /* database */ + dbv = colon(dbv, optarg, _PATH_FCODES); + break; + case 'i': /* ignore case */ + f_icase = 1; + break; + case 'm': /* mmap */ +#ifdef MMAP + f_mmap = 1; +#else + warnx("mmap(2) not implemented"); +#endif + break; + case 's': /* stdio lib */ + f_mmap = 0; + break; + case 'c': /* suppress output, show only count of matches */ + f_silent = 1; + break; + default: + usage(); + } + argv += optind; + argc -= optind; + + /* to few arguments */ + if (argc < 1 && !(f_statistic)) + usage(); + + /* no (valid) database as argument */ + if (dbv == NULL || *dbv == NULL) { + /* try to read database from enviroment */ + if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || + *path_fcodes == '\0') + /* use default database */ + dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); + else /* $LOCATE_PATH */ + dbv = colon(dbv, path_fcodes, _PATH_FCODES); + } + + if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ + for (ch = 0; ch < UCHAR_MAX + 1; ch++) + myctype[ch] = tolower(ch); + + /* foreach database ... */ + while((path_fcodes = *dbv) != NULL) { + dbv++; + + if (!strcmp(path_fcodes, "-")) + f_stdin = 1; + else + f_stdin = 0; + +#ifndef MMAP + f_mmap = 0; /* be paranoid */ +#endif + if (!f_mmap || f_stdin || f_statistic) + search_fopen(path_fcodes, argv); + else + search_mmap(path_fcodes, argv); + } + + if (f_silent) + print_matches(counter); + exit(0); +} + + +void +search_fopen(db, s) + char *db; /* database */ + char **s; /* search strings */ +{ + FILE *fp; +#ifdef DEBUG + long t0; +#endif + + /* can only read stdin once */ + if (f_stdin) { + fp = stdin; + if (*(s+1) != NULL) { + warnx("read database from stdin, use only `%s' as pattern", *s); + *(s+1) = NULL; + } + } + else if ((fp = fopen(db, "r")) == NULL) + err(1, "`%s'", db); + + /* count only chars or lines */ + if (f_statistic) { + statistic(fp, db); + (void)fclose(fp); + return; + } + + /* foreach search string ... */ + while(*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (!f_stdin && + fseek(fp, (long)0, SEEK_SET) == -1) + err(1, "fseek to begin of ``%s''\n", db); + + if (f_icase) + fastfind_icase(fp, *s, db); + else + fastfind(fp, *s, db); +#ifdef DEBUG + warnx("fastfind %ld ms", cputime () - t0); +#endif + s++; + } + (void)fclose(fp); +} + +#ifdef MMAP +void +search_mmap(db, s) + char *db; /* database */ + char **s; /* search strings */ +{ + struct stat sb; + int fd; + caddr_t p; + off_t len; +#ifdef DEBUG + long t0; +#endif + if ((fd = open(db, O_RDONLY)) == -1 || + fstat(fd, &sb) == -1) + err(1, "`%s'", db); + len = sb.st_size; + if (len < (2*NBG)) + errx(1, "database too small: %s", db); + + if ((p = mmap((caddr_t)0, (size_t)len, + PROT_READ, MAP_SHARED, + fd, (off_t)0)) == MAP_FAILED) + err(1, "mmap ``%s''", db); + + /* foreach search string ... */ + while (*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (f_icase) + fastfind_mmap_icase(*s, p, (int)len, db); + else + fastfind_mmap(*s, p, (int)len, db); +#ifdef DEBUG + warnx("fastfind %ld ms", cputime () - t0); +#endif + s++; + } + + if (munmap(p, (size_t)len) == -1) + warn("munmap %s\n", db); + + (void)close(fd); +} +#endif /* MMAP */ + +#ifdef DEBUG +unsigned long +cputime () +{ + struct rusage rus; + + getrusage(RUSAGE_SELF, &rus); + return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); +} +#endif /* DEBUG */ + +void +usage () +{ + (void)fprintf(stderr, + "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); + (void)fprintf(stderr, + "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); + exit(1); +} + + +/* load fastfind functions */ + +/* statistic */ +/* fastfind_mmap, fastfind_mmap_icase */ +#ifdef MMAP +#undef FF_MMAP +#undef FF_ICASE + +#define FF_MMAP +#include "fastfind.c" +#define FF_ICASE +#include "fastfind.c" +#endif /* MMAP */ + +/* fopen */ +/* fastfind, fastfind_icase */ +#undef FF_MMAP +#undef FF_ICASE +#include "fastfind.c" +#define FF_ICASE +#include "fastfind.c" diff --git a/locate.freebsd/locate/locate.h b/locate.freebsd/locate/locate.h new file mode 100644 index 0000000..24df8d4 --- /dev/null +++ b/locate.freebsd/locate/locate.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)locate.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD$ + */ + +/* Symbolic constants shared by locate.c and code.c */ + +#define NBG 128 /* number of bigrams considered */ +#define OFFSET 14 /* abs value of max likely diff */ +#define PARITY 0200 /* parity bit */ +#define SWITCH 30 /* switch code */ +#define UMLAUT 31 /* an 8 bit char followed */ + +/* 0-28 likeliest differential counts + offset to make nonnegative */ +#define LDC_MIN 0 +#define LDC_MAX 28 + +/* 128-255 bigram codes (128 most common, as determined by 'updatedb') */ +#define BIGRAM_MIN (UCHAR_MAX - CHAR_MAX) +#define BIGRAM_MAX UCHAR_MAX + +/* 32-127 single character (printable) ascii residue (ie, literal) */ +#define ASCII_MIN 32 +#define ASCII_MAX CHAR_MAX + +/* #define TO7BIT(x) (x = ( ((u_char)x) & CHAR_MAX )) */ +#define TO7BIT(x) (x = x & CHAR_MAX ) + + +#if UCHAR_MAX >= 4096 + define TOLOWER(ch) tolower(ch) +#else + +u_char myctype[UCHAR_MAX + 1]; +#define TOLOWER(ch) (myctype[ch]) +#endif + +#define INTSIZE (sizeof(int)) + +#define LOCATE_REG "*?[]\\" /* fnmatch(3) meta characters */ diff --git a/locate.freebsd/locate/locate.updatedb.8 b/locate.freebsd/locate/locate.updatedb.8 new file mode 100644 index 0000000..2e2248f --- /dev/null +++ b/locate.freebsd/locate/locate.updatedb.8 @@ -0,0 +1,75 @@ +.\" Copyright (c) 1996 +.\" Mike Pritchard . All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by Mike Pritchard. +.\" 4. Neither the name of the author nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 11, 1996 +.Dt LOCATE.UPDATEDB 8 +.Os +.Sh NAME +.Nm locate.updatedb +.Nd update locate database +.Sh SYNOPSIS +.Nm /usr/libexec/locate.updatedb +.Sh DESCRIPTION +The +.Nm +utility updates the database used by +.Xr locate 1 . +It is typically run once a week by the +.Pa /etc/periodic/weekly/310.locate +script. +.Pp +The contents of the newly built database can be controlled by the +.Pa /etc/locate.rc +file. +.Sh ENVIRONMENT +.Bl -tag -width /var/db/locate.database -compact +.It Pa LOCATE_CONFIG +path to the configuration file +.El +.Sh FILES +.Bl -tag -width /var/db/locate.database -compact +.It Pa /var/db/locate.database +the default database +.It Pa /etc/locate.rc +the configuration file +.El +.Sh SEE ALSO +.Xr locate 1 , +.Xr periodic 8 +.Rs +.%A Woods, James A. +.%D 1983 +.%T "Finding Files Fast" +.%J ";login" +.%V 8:1 +.%P pp. 8-10 +.Re diff --git a/locate.freebsd/locate/mklocatedb.sh b/locate.freebsd/locate/mklocatedb.sh new file mode 100644 index 0000000..39d15d7 --- /dev/null +++ b/locate.freebsd/locate/mklocatedb.sh @@ -0,0 +1,92 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# mklocatedb - build locate database +# +# usage: mklocatedb [-presort] < filelist > database +# +# $FreeBSD$ + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + +umask 077 # protect temp files + +: ${TMPDIR:=/tmp}; export TMPDIR +test -d "$TMPDIR" || TMPDIR=/tmp +if ! TMPDIR=`mktemp -d $TMPDIR/mklocateXXXXXXXXXX`; then + exit 1 +fi + + +# utilities to built locate database +: ${bigram:=locate.bigram} +: ${code:=locate.code} +: ${sort:=sort} + + +sortopt="-u -T $TMPDIR" +sortcmd=$sort + + +bigrams=$TMPDIR/_mklocatedb$$.bigrams +filelist=$TMPDIR/_mklocatedb$$.list + +trap 'rm -f $bigrams $filelist; rmdir $TMPDIR' 0 1 2 3 5 10 15 + + +# Input already sorted +if [ X"$1" = "X-presort" ]; then + shift; + + # create an empty file + true > $bigrams + + # Locate database bootstrapping + # 1. first build a temp database without bigram compression + # 2. create the bigram from the temp database + # 3. create the real locate database with bigram compression. + # + # This scheme avoid large temporary files in /tmp + + $code $bigrams > $filelist || exit 1 + locate -d $filelist / | $bigram | $sort -nr | head -128 | + awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1 + locate -d $filelist / | $code $bigrams || exit 1 + exit + +else + if $sortcmd $sortopt > $filelist; then + $bigram < $filelist | $sort -nr | + awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1 + $code $bigrams < $filelist || exit 1 + else + echo "`basename $0`: cannot build locate database" >&2 + exit 1 + fi +fi diff --git a/locate.freebsd/locate/pathnames.h b/locate.freebsd/locate/pathnames.h new file mode 100644 index 0000000..8fb0e8c --- /dev/null +++ b/locate.freebsd/locate/pathnames.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pathnames.h 8.1 (Berkeley) 6/6/93 + */ + +#define _PATH_FCODES "/var/db/locate.database" diff --git a/locate.freebsd/locate/updatedb.sh b/locate.freebsd/locate/updatedb.sh new file mode 100644 index 0000000..d828438 --- /dev/null +++ b/locate.freebsd/locate/updatedb.sh @@ -0,0 +1,95 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# updatedb - update locate database for local mounted filesystems +# +# $FreeBSD$ + +if [ "$(id -u)" = "0" ]; then + echo ">>> WARNING" 1>&2 + echo ">>> Executing updatedb as root. This WILL reveal all filenames" 1>&2 + echo ">>> on your machine to all login users, which is a security risk." 1>&2 +fi +: ${LOCATE_CONFIG="/etc/locate.rc"} +if [ -f "$LOCATE_CONFIG" -a -r "$LOCATE_CONFIG" ]; then + . $LOCATE_CONFIG +fi + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR +: ${TMPDIR:=/tmp}; export TMPDIR +if ! TMPDIR=`mktemp -d $TMPDIR/locateXXXXXXXXXX`; then + exit 1 +fi + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + + +: ${mklocatedb:=locate.mklocatedb} # make locate database program +: ${FCODES:=/var/db/locate.database} # the database +: ${SEARCHPATHS:="/"} # directories to be put in the database +: ${PRUNEPATHS:="/tmp /usr/tmp /var/tmp /var/db/portsnap"} # unwanted directories +: ${FILESYSTEMS:="$(lsvfs | tail -n +3 | \ + egrep -vw "loopback|network|synthetic|read-only|0" | \ + cut -d " " -f1)"} # allowed filesystems +: ${find:=find} + +case X"$SEARCHPATHS" in + X) echo "$0: empty variable SEARCHPATHS"; exit 1;; esac +case X"$FILESYSTEMS" in + X) echo "$0: empty variable FILESYSTEMS"; exit 1;; esac + +# Make a list a paths to exclude in the locate run +excludes="! (" or="" +for fstype in $FILESYSTEMS +do + excludes="$excludes $or -fstype $fstype" + or="-or" +done +excludes="$excludes ) -prune" + +case X"$PRUNEPATHS" in + X) ;; + *) for path in $PRUNEPATHS + do + excludes="$excludes -or -path $path -prune" + done;; +esac + +tmp=$TMPDIR/_updatedb$$ +trap 'rm -f $tmp; rmdir $TMPDIR' 0 1 2 3 5 10 15 + +# search locally +# echo $find $SEARCHPATHS $excludes -or -print && exit +if $find -s $SEARCHPATHS $excludes -or -print 2>/dev/null | + $mklocatedb -presort > $tmp +then + case X"`$find $tmp -size -257c -print`" in + X) cat $tmp > $FCODES;; + *) echo "updatedb: locate database $tmp is empty" + exit 1 + esac +fi diff --git a/locate.freebsd/locate/util.c b/locate.freebsd/locate/util.c new file mode 100644 index 0000000..9cd02b0 --- /dev/null +++ b/locate.freebsd/locate/util.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +#include +#include +#include +#include +#include +#include + +#include "locate.h" + +char **colon(char **, char*, char*); +char *patprep(char *); +void print_matches(u_int); +u_char *tolower_word(u_char *); +int getwm(caddr_t); +int getwf(FILE *); +int check_bigram_char(int); + +/* + * Validate bigram chars. If the test failed the database is corrupt + * or the database is obviously not a locate database. + */ +int +check_bigram_char(ch) + int ch; +{ + /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */ + if (ch == 0 || + (ch >= ASCII_MIN && ch <= ASCII_MAX)) + return(ch); + + errx(1, + "locate database header corrupt, bigram char outside 0, %d-%d: %d", + ASCII_MIN, ASCII_MAX, ch); + exit(1); +} + +/* split a colon separated string into a char vector + * + * "bla:foo" -> {"foo", "bla"} + * "bla:" -> {"foo", dot} + * "bla" -> {"bla"} + * "" -> do nothing + * + */ +char ** +colon(dbv, path, dot) + char **dbv; + char *path; + char *dot; /* default for single ':' */ +{ + int vlen, slen; + char *c, *ch, *p; + char **pv; + + if (dbv == NULL) { + if ((dbv = malloc(sizeof(char **))) == NULL) + err(1, "malloc"); + *dbv = NULL; + } + + /* empty string */ + if (*path == '\0') { + warnx("empty database name, ignored"); + return(dbv); + } + + /* length of string vector */ + for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++); + + for (ch = c = path; ; ch++) { + if (*ch == ':' || + (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) { + /* single colon -> dot */ + if (ch == c) + p = dot; + else { + /* a string */ + slen = ch - c; + if ((p = malloc(sizeof(char) * (slen + 1))) + == NULL) + err(1, "malloc"); + bcopy(c, p, slen); + *(p + slen) = '\0'; + } + /* increase dbv with element p */ + if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2))) + == NULL) + err(1, "realloc"); + *(dbv + vlen) = p; + *(dbv + ++vlen) = NULL; + c = ch + 1; + } + if (*ch == '\0') + break; + } + return (dbv); +} + +void +print_matches(counter) + u_int counter; +{ + (void)printf("%d\n", counter); +} + + +/* + * extract last glob-free subpattern in name for fast pre-match; prepend + * '\0' for backwards match; return end of new pattern + */ +static char globfree[100]; + +char * +patprep(name) + char *name; +{ + register char *endmark, *p, *subp; + + subp = globfree; + *subp++ = '\0'; /* set first element to '\0' */ + p = name + strlen(name) - 1; + + /* skip trailing metacharacters */ + for (; p >= name; p--) + if (index(LOCATE_REG, *p) == NULL) + break; + + /* + * check if maybe we are in a character class + * + * 'foo.[ch]' + * |----< p + */ + if (p >= name && + (index(p, '[') != NULL || index(p, ']') != NULL)) { + for (p = name; *p != '\0'; p++) + if (*p == ']' || *p == '[') + break; + p--; + + /* + * cannot find a non-meta character, give up + * '*\*[a-z]' + * |-------< p + */ + if (p >= name && index(LOCATE_REG, *p) != NULL) + p = name - 1; + } + + if (p < name) + /* only meta chars: "???", force '/' search */ + *subp++ = '/'; + + else { + for (endmark = p; p >= name; p--) + if (index(LOCATE_REG, *p) != NULL) + break; + for (++p; + (p <= endmark) && subp < (globfree + sizeof(globfree));) + *subp++ = *p++; + } + *subp = '\0'; + return(--subp); +} + +/* tolower word */ +u_char * +tolower_word(word) + u_char *word; +{ + register u_char *p; + + for(p = word; *p != '\0'; p++) + *p = TOLOWER(*p); + + return(word); +} + + +/* + * Read integer from mmap pointer. + * Essential a simple ``return *(int *)p'' but avoid sigbus + * for integer alignment (SunOS 4.x, 5.x). + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwm(p) + caddr_t p; +{ + union { + char buf[INTSIZE]; + int i; + } u; + register int i; + + for (i = 0; i < (int)INTSIZE; i++) + u.buf[i] = *p++; + + i = u.i; + + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + i = ntohl(i); + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) + errx(1, "integer out of +-MAXPATHLEN (%d): %u", + MAXPATHLEN, abs(i) < abs(htonl(i)) ? i : htonl(i)); + } + return(i); +} + +/* + * Read integer from stream. + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwf(fp) + FILE *fp; +{ + register int word; + + word = getw(fp); + + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + word = ntohl(word); + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) + errx(1, "integer out of +-MAXPATHLEN (%d): %u", + MAXPATHLEN, abs(word) < abs(htonl(word)) ? word : + htonl(word)); + } + return(word); +} -- cgit v0.9.1