mirror of
https://github.com/openbsd/src.git
synced 2024-12-22 07:27:59 -08:00
d9a51c353c
amendments to his diff are noted on tech
797 lines
17 KiB
C
797 lines
17 KiB
C
/* $OpenBSD: spellprog.c,v 1.16 2022/12/26 19:16:03 jmc Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 1991, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)spell.h 8.1 (Berkeley) 6/6/93
|
|
*/
|
|
/*
|
|
* Copyright (C) Caldera International Inc. 2001-2002.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code and documentation must retain the above
|
|
* copyright notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed or owned by Caldera
|
|
* International, Inc.
|
|
* 4. Neither the name of Caldera International, Inc. nor the names of other
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
|
|
* INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
|
|
* INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
|
|
#include <ctype.h>
|
|
#include <err.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <limits.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#define DLEV 2
|
|
|
|
int an(char *, char *, char *, int);
|
|
int bility(char *, char *, char *, int);
|
|
int es(char *, char *, char *, int);
|
|
int dict(char *, char *);
|
|
int i_to_y(char *, char *, char *, int);
|
|
int ily(char *, char *, char *, int);
|
|
int ize(char *, char *, char *, int);
|
|
int metry(char *, char *, char *, int);
|
|
int monosyl(char *, char *);
|
|
int ncy(char *, char *, char *, int);
|
|
int nop(char *, char *, char *, int);
|
|
int trypref(char *, char *, int);
|
|
int tryword(char *, char *, int);
|
|
int s(char *, char *, char *, int);
|
|
int strip(char *, char *, char *, int);
|
|
int suffix(char *, int);
|
|
int tion(char *, char *, char *, int);
|
|
int vowel(unsigned char);
|
|
int y_to_e(char *, char *, char *, int);
|
|
int CCe(char *, char *, char *, int);
|
|
int VCe(char *, char *, char *, int);
|
|
char *lookuppref(char **, char *);
|
|
char *skipv(char *);
|
|
char *estrdup(const char *);
|
|
void ise(void);
|
|
void print_word(FILE *);
|
|
void ztos(char *);
|
|
static void __dead usage(void);
|
|
|
|
/* from look.c */
|
|
int look(unsigned char *, unsigned char *, unsigned char *);
|
|
|
|
struct suftab {
|
|
char *suf;
|
|
int (*p1)(char *, char *, char *, int);
|
|
int n1;
|
|
char *d1;
|
|
char *a1;
|
|
int (*p2)(char *, char *, char *, int);
|
|
int n2;
|
|
char *d2;
|
|
char *a2;
|
|
} suftab[] = {
|
|
{"ssen", ily, 4, "-y+iness", "+ness" },
|
|
{"ssel", ily, 4, "-y+i+less", "+less" },
|
|
{"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
|
|
{"s'", s, 2, "", "+'s"},
|
|
{"s", s, 1, "", "+s"},
|
|
{"ecn", ncy, 1, "", "-t+ce"},
|
|
{"ycn", ncy, 1, "", "-cy+t"},
|
|
{"ytilb", nop, 0, "", ""},
|
|
{"ytilib", bility, 5, "-le+ility", ""},
|
|
{"elbaif", i_to_y, 4, "-y+iable", ""},
|
|
{"elba", CCe, 4, "-e+able", "+able"},
|
|
{"yti", CCe, 3, "-e+ity", "+ity"},
|
|
{"ylb", y_to_e, 1, "-e+y", ""},
|
|
{"yl", ily, 2, "-y+ily", "+ly"},
|
|
{"laci", strip, 2, "", "+al"},
|
|
{"latnem", strip, 2, "", "+al"},
|
|
{"lanoi", strip, 2, "", "+al"},
|
|
{"tnem", strip, 4, "", "+ment"},
|
|
{"gni", CCe, 3, "-e+ing", "+ing"},
|
|
{"reta", nop, 0, "", ""},
|
|
{"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
|
|
{"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
|
|
{"citsi", strip, 2, "", "+ic"},
|
|
{"cihparg", i_to_y, 1, "-y+ic", ""},
|
|
{"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
|
|
{"cirtem", i_to_y, 1, "-y+ic", ""},
|
|
{"yrtem", metry, 0, "-ry+er", ""},
|
|
{"cigol", i_to_y, 1, "-y+ic", ""},
|
|
{"tsigol", i_to_y, 2, "-y+ist", ""},
|
|
{"tsi", VCe, 3, "-e+ist", "+ist"},
|
|
{"msi", VCe, 3, "-e+ism", "+ist"},
|
|
{"noitacif", i_to_y, 6, "-y+ication", ""},
|
|
{"noitazi", ize, 5, "-e+ation", ""},
|
|
{"rota", tion, 2, "-e+or", ""},
|
|
{"noit", tion, 3, "-e+ion", "+ion"},
|
|
{"naino", an, 3, "", "+ian"},
|
|
{"na", an, 1, "", "+n"},
|
|
{"evit", tion, 3, "-e+ive", "+ive"},
|
|
{"ezi", CCe, 3, "-e+ize", "+ize"},
|
|
{"pihs", strip, 4, "", "+ship"},
|
|
{"dooh", ily, 4, "-y+hood", "+hood"},
|
|
{"ekil", strip, 4, "", "+like"},
|
|
{ NULL }
|
|
};
|
|
|
|
char *preftab[] = {
|
|
"anti",
|
|
"bio",
|
|
"dis",
|
|
"electro",
|
|
"en",
|
|
"fore",
|
|
"hyper",
|
|
"intra",
|
|
"inter",
|
|
"iso",
|
|
"kilo",
|
|
"magneto",
|
|
"meta",
|
|
"micro",
|
|
"milli",
|
|
"mis",
|
|
"mono",
|
|
"multi",
|
|
"non",
|
|
"out",
|
|
"over",
|
|
"photo",
|
|
"poly",
|
|
"pre",
|
|
"pseudo",
|
|
"re",
|
|
"semi",
|
|
"stereo",
|
|
"sub",
|
|
"super",
|
|
"thermo",
|
|
"ultra",
|
|
"under", /* must precede un */
|
|
"un",
|
|
NULL
|
|
};
|
|
|
|
struct wlist {
|
|
int fd;
|
|
unsigned char *front;
|
|
unsigned char *back;
|
|
} *wlists;
|
|
|
|
int vflag;
|
|
int xflag;
|
|
char word[LINE_MAX];
|
|
char original[LINE_MAX];
|
|
char *deriv[40];
|
|
char affix[40];
|
|
|
|
/*
|
|
* The spellprog utility accepts a newline-delimited list of words
|
|
* on stdin. For arguments it expects the path to a word list and
|
|
* the path to a file in which to store found words.
|
|
*
|
|
* In normal usage, spell is called twice. The first time it is
|
|
* called with a stop list to flag commonly misspelled words. The
|
|
* remaining words are then passed to spell again, this time with
|
|
* the dictionary file as the first (non-flag) argument.
|
|
*
|
|
* Unlike historic versions of spellprog, this one does not use
|
|
* hashed files. Instead it simply requires that files be sorted
|
|
* lexigraphically and uses the same algorithm as the look utility.
|
|
*
|
|
* Note that spellprog should be called via the spell shell script
|
|
* and is not meant to be invoked directly by the user.
|
|
*/
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
char *ep, *cp, *dp;
|
|
char *outfile;
|
|
int ch, fold, i;
|
|
struct stat sb;
|
|
FILE *file, *found;
|
|
|
|
if (pledge("stdio rpath wpath cpath", NULL) == -1)
|
|
err(1, "pledge");
|
|
|
|
outfile = NULL;
|
|
while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
|
|
switch (ch) {
|
|
case 'b':
|
|
/* Use British dictionary and convert ize -> ise. */
|
|
ise();
|
|
break;
|
|
case 'o':
|
|
outfile = optarg;
|
|
break;
|
|
case 'v':
|
|
/* Also write derivations to "found" file. */
|
|
vflag = 1;
|
|
break;
|
|
case 'x':
|
|
/* Print plausible stems to stdout. */
|
|
xflag = 1;
|
|
break;
|
|
default:
|
|
usage();
|
|
}
|
|
|
|
}
|
|
argc -= optind;
|
|
argv += optind;
|
|
if (argc < 1)
|
|
usage();
|
|
|
|
/* Open and mmap the word/stop lists. */
|
|
if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL)
|
|
err(1, "malloc");
|
|
for (i = 0; argc--; i++) {
|
|
wlists[i].fd = open(argv[i], O_RDONLY);
|
|
if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
|
|
err(1, "%s", argv[i]);
|
|
if (sb.st_size > SIZE_MAX)
|
|
errc(1, EFBIG, "%s", argv[i]);
|
|
wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
|
|
MAP_PRIVATE, wlists[i].fd, (off_t)0);
|
|
if (wlists[i].front == MAP_FAILED)
|
|
err(1, "%s", argv[i]);
|
|
wlists[i].back = wlists[i].front + sb.st_size;
|
|
}
|
|
wlists[i].fd = -1;
|
|
|
|
/* Open file where found words are to be saved. */
|
|
if (outfile == NULL)
|
|
found = NULL;
|
|
else if ((found = fopen(outfile, "w")) == NULL)
|
|
err(1, "cannot open %s", outfile);
|
|
|
|
for (;; print_word(file)) {
|
|
affix[0] = '\0';
|
|
file = found;
|
|
for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
|
|
if (ep - word == sizeof(word) - 1) {
|
|
*ep = '\0';
|
|
warnx("word too long (%s)", word);
|
|
while ((ch = getchar()) != '\n')
|
|
; /* slurp until EOL */
|
|
}
|
|
if (ch == EOF) {
|
|
if (found != NULL)
|
|
fclose(found);
|
|
return (0);
|
|
}
|
|
}
|
|
for (cp = word, dp = original; cp < ep; )
|
|
*dp++ = *cp++;
|
|
*dp = '\0';
|
|
fold = 0;
|
|
for (cp = word; cp < ep; cp++)
|
|
if (islower((unsigned char)*cp))
|
|
goto lcase;
|
|
if (trypref(ep, ".", 0))
|
|
continue;
|
|
++fold;
|
|
for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
|
|
*dp = tolower((unsigned char)*cp);
|
|
lcase:
|
|
if (trypref(ep, ".", 0) || suffix(ep, 0))
|
|
continue;
|
|
if (isupper((unsigned char)word[0])) {
|
|
for (cp = original, dp = word; (*dp = *cp++); dp++) {
|
|
if (fold)
|
|
*dp = tolower((unsigned char)*dp);
|
|
}
|
|
word[0] = tolower((unsigned char)word[0]);
|
|
goto lcase;
|
|
}
|
|
file = stdout;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
print_word(FILE *f)
|
|
{
|
|
|
|
if (f != NULL) {
|
|
if (vflag && affix[0] != '\0' && affix[0] != '.')
|
|
fprintf(f, "%s\t%s\n", affix, original);
|
|
else
|
|
fprintf(f, "%s\n", original);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* For each matching suffix in suftab, call the function associated
|
|
* with that suffix (p1 and p2).
|
|
*/
|
|
int
|
|
suffix(char *ep, int lev)
|
|
{
|
|
struct suftab *t;
|
|
char *cp, *sp;
|
|
|
|
lev += DLEV;
|
|
deriv[lev] = deriv[lev-1] = 0;
|
|
for (t = suftab; (sp = t->suf); t++) {
|
|
cp = ep;
|
|
while (*sp) {
|
|
if (*--cp != *sp++)
|
|
goto next;
|
|
}
|
|
for (sp = cp; --sp >= word && !vowel(*sp);)
|
|
; /* nothing */
|
|
if (sp < word)
|
|
return (0);
|
|
if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
|
|
return (1);
|
|
if (t->p2 != NULL) {
|
|
deriv[lev] = deriv[lev+1] = 0;
|
|
return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
|
|
}
|
|
return (0);
|
|
next: ;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
nop(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
strip(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
return (trypref(ep, a, lev) || suffix(ep, lev));
|
|
}
|
|
|
|
int
|
|
s(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (lev > DLEV + 1)
|
|
return (0);
|
|
if (*ep == 's' && ep[-1] == 's')
|
|
return (0);
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
|
|
int
|
|
an(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (!isupper((unsigned char)*word)) /* must be proper name */
|
|
return (0);
|
|
return (trypref(ep,a,lev));
|
|
}
|
|
|
|
int
|
|
ize(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
*ep++ = 'e';
|
|
return (strip(ep ,"", d, lev));
|
|
}
|
|
|
|
int
|
|
y_to_e(char *ep, char *d, char *a, int lev)
|
|
{
|
|
char c = *ep;
|
|
|
|
*ep++ = 'e';
|
|
if (strip(ep, "", d, lev))
|
|
return (1);
|
|
ep[-1] = c;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
ily(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (ep[-1] == 'i')
|
|
return (i_to_y(ep, d, a, lev));
|
|
else
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
|
|
int
|
|
ncy(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (skipv(skipv(ep-1)) < word)
|
|
return (0);
|
|
ep[-1] = 't';
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
|
|
int
|
|
bility(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
*ep++ = 'l';
|
|
return (y_to_e(ep, d, a, lev));
|
|
}
|
|
|
|
int
|
|
i_to_y(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (ep[-1] == 'i') {
|
|
ep[-1] = 'y';
|
|
a = d;
|
|
}
|
|
return (strip(ep, "", a, lev));
|
|
}
|
|
|
|
int
|
|
es(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
if (lev > DLEV)
|
|
return (0);
|
|
|
|
switch (ep[-1]) {
|
|
default:
|
|
return (0);
|
|
case 'i':
|
|
return (i_to_y(ep, d, a, lev));
|
|
case 's':
|
|
case 'h':
|
|
case 'z':
|
|
case 'x':
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
}
|
|
|
|
int
|
|
metry(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
ep[-2] = 'e';
|
|
ep[-1] = 'r';
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
|
|
int
|
|
tion(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
switch (ep[-2]) {
|
|
case 'c':
|
|
case 'r':
|
|
return (trypref(ep, a, lev));
|
|
case 'a':
|
|
return (y_to_e(ep, d, a, lev));
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Possible consonant-consonant-e ending.
|
|
*/
|
|
int
|
|
CCe(char *ep, char *d, char *a, int lev)
|
|
{
|
|
|
|
switch (ep[-1]) {
|
|
case 'l':
|
|
if (vowel(ep[-2]))
|
|
break;
|
|
switch (ep[-2]) {
|
|
case 'l':
|
|
case 'r':
|
|
case 'w':
|
|
break;
|
|
default:
|
|
return (y_to_e(ep, d, a, lev));
|
|
}
|
|
break;
|
|
case 's':
|
|
if (ep[-2] == 's')
|
|
break;
|
|
case 'c':
|
|
case 'g':
|
|
if (*ep == 'a')
|
|
return (0);
|
|
case 'v':
|
|
case 'z':
|
|
if (vowel(ep[-2]))
|
|
break;
|
|
case 'u':
|
|
if (y_to_e(ep, d, a, lev))
|
|
return (1);
|
|
if (!(ep[-2] == 'n' && ep[-1] == 'g'))
|
|
return (0);
|
|
}
|
|
return (VCe(ep, d, a, lev));
|
|
}
|
|
|
|
/*
|
|
* Possible consonant-vowel-consonant-e ending.
|
|
*/
|
|
int
|
|
VCe(char *ep, char *d, char *a, int lev)
|
|
{
|
|
char c;
|
|
|
|
c = ep[-1];
|
|
if (c == 'e')
|
|
return (0);
|
|
if (!vowel(c) && vowel(ep[-2])) {
|
|
c = *ep;
|
|
*ep++ = 'e';
|
|
if (trypref(ep, d, lev) || suffix(ep, lev))
|
|
return (1);
|
|
ep--;
|
|
*ep = c;
|
|
}
|
|
return (strip(ep, d, a, lev));
|
|
}
|
|
|
|
char *
|
|
lookuppref(char **wp, char *ep)
|
|
{
|
|
char **sp;
|
|
char *bp,*cp;
|
|
|
|
for (sp = preftab; *sp; sp++) {
|
|
bp = *wp;
|
|
for (cp = *sp; *cp; cp++, bp++) {
|
|
if (tolower((unsigned char)*bp) != *cp)
|
|
goto next;
|
|
}
|
|
for (cp = bp; cp < ep; cp++) {
|
|
if (vowel(*cp)) {
|
|
*wp = bp;
|
|
return (*sp);
|
|
}
|
|
}
|
|
next: ;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* If the word is not in the dictionary, try stripping off prefixes
|
|
* until the word is found or we run out of prefixes to check.
|
|
*/
|
|
int
|
|
trypref(char *ep, char *a, int lev)
|
|
{
|
|
char *cp;
|
|
char *bp;
|
|
char *pp;
|
|
int val = 0;
|
|
char space[20];
|
|
|
|
deriv[lev] = a;
|
|
if (tryword(word, ep, lev))
|
|
return (1);
|
|
bp = word;
|
|
pp = space;
|
|
deriv[lev+1] = pp;
|
|
while ((cp = lookuppref(&bp, ep))) {
|
|
*pp++ = '+';
|
|
while ((*pp = *cp++))
|
|
pp++;
|
|
if (tryword(bp, ep, lev+1)) {
|
|
val = 1;
|
|
break;
|
|
}
|
|
if (pp - space >= sizeof(space))
|
|
return (0);
|
|
}
|
|
deriv[lev+1] = deriv[lev+2] = 0;
|
|
return (val);
|
|
}
|
|
|
|
int
|
|
tryword(char *bp, char *ep, int lev)
|
|
{
|
|
int i, j;
|
|
char duple[3];
|
|
|
|
if (ep-bp <= 1)
|
|
return (0);
|
|
if (vowel(*ep) && monosyl(bp, ep))
|
|
return (0);
|
|
|
|
i = dict(bp, ep);
|
|
if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
|
|
ep--;
|
|
deriv[++lev] = duple;
|
|
duple[0] = '+';
|
|
duple[1] = *ep;
|
|
duple[2] = '\0';
|
|
i = dict(bp, ep);
|
|
}
|
|
if (vflag == 0 || i == 0)
|
|
return (i);
|
|
|
|
/* Also tack on possible derivations. (XXX - warn on truncation?) */
|
|
for (j = lev; j > 0; j--) {
|
|
if (deriv[j])
|
|
strlcat(affix, deriv[j], sizeof(affix));
|
|
}
|
|
return (i);
|
|
}
|
|
|
|
int
|
|
monosyl(char *bp, char *ep)
|
|
{
|
|
|
|
if (ep < bp + 2)
|
|
return (0);
|
|
if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
|
|
return (0);
|
|
while (--ep >= bp)
|
|
if (vowel(*ep))
|
|
return (0);
|
|
return (1);
|
|
}
|
|
|
|
char *
|
|
skipv(char *s)
|
|
{
|
|
|
|
if (s >= word && vowel(*s))
|
|
s--;
|
|
while (s >= word && !vowel(*s))
|
|
s--;
|
|
return (s);
|
|
}
|
|
|
|
int
|
|
vowel(unsigned char c)
|
|
{
|
|
|
|
switch (tolower(c)) {
|
|
case 'a':
|
|
case 'e':
|
|
case 'i':
|
|
case 'o':
|
|
case 'u':
|
|
case 'y':
|
|
return (1);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Crummy way to Britishise.
|
|
*/
|
|
void
|
|
ise(void)
|
|
{
|
|
struct suftab *tab;
|
|
|
|
for (tab = suftab; tab->suf; tab++) {
|
|
/* Assume that suffix will contain 'z' if a1 or d1 do */
|
|
if (strchr(tab->suf, 'z')) {
|
|
tab->suf = estrdup(tab->suf);
|
|
ztos(tab->suf);
|
|
if (strchr(tab->d1, 'z')) {
|
|
tab->d1 = estrdup(tab->d1);
|
|
ztos(tab->d1);
|
|
}
|
|
if (strchr(tab->a1, 'z')) {
|
|
tab->a1 = estrdup(tab->a1);
|
|
ztos(tab->a1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
ztos(char *s)
|
|
{
|
|
|
|
for (; *s; s++)
|
|
if (*s == 'z')
|
|
*s = 's';
|
|
}
|
|
|
|
char *
|
|
estrdup(const char *s)
|
|
{
|
|
char *d;
|
|
|
|
if ((d = strdup(s)) == NULL)
|
|
err(1, "strdup");
|
|
return (d);
|
|
}
|
|
|
|
/*
|
|
* Look up a word in the dictionary.
|
|
* Returns 1 if found, 0 if not.
|
|
*/
|
|
int
|
|
dict(char *bp, char *ep)
|
|
{
|
|
char c;
|
|
int i, rval;
|
|
|
|
c = *ep;
|
|
*ep = '\0';
|
|
if (xflag)
|
|
printf("=%s\n", bp);
|
|
for (i = rval = 0; wlists[i].fd != -1; i++) {
|
|
if ((rval = look((unsigned char *)bp, wlists[i].front,
|
|
wlists[i].back)) == 1)
|
|
break;
|
|
}
|
|
*ep = c;
|
|
return (rval);
|
|
}
|
|
|
|
static void __dead
|
|
usage(void)
|
|
{
|
|
extern char *__progname;
|
|
|
|
fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
|
|
__progname);
|
|
exit(1);
|
|
}
|