1
0
mirror of https://github.com/openbsd/src.git synced 2025-01-10 06:47:55 -08:00

UTF-8 support: use wcwidth(3) when calculating column widths;

written during g218; no objection when shown on tech@
This commit is contained in:
schwarze 2018-07-29 11:27:14 +00:00
parent 925939b706
commit 94b41d461e
4 changed files with 126 additions and 31 deletions

View File

@ -1,5 +1,6 @@
# $OpenBSD: Makefile,v 1.3 1997/09/21 11:49:24 deraadt Exp $
# $OpenBSD: Makefile,v 1.4 2018/07/29 11:27:14 schwarze Exp $
PROG= lam
SRCS= lam.c utf8.c
.include <bsd.prog.mk>

View File

@ -1,4 +1,4 @@
.\" $OpenBSD: lam.1,v 1.9 2016/01/04 23:21:28 schwarze Exp $
.\" $OpenBSD: lam.1,v 1.10 2018/07/29 11:27:14 schwarze Exp $
.\" $NetBSD: lam.1,v 1.4 2002/02/08 01:36:25 ross Exp $
.\"
.\" Copyright (c) 1993
@ -30,7 +30,7 @@
.\"
.\" @(#)lam.1 8.1 (Berkeley) 6/6/93
.\"
.Dd $Mdocdate: January 4 2016 $
.Dd $Mdocdate: July 29 2018 $
.Dt LAM 1
.Os
.Sh NAME
@ -74,8 +74,8 @@ is the minimum field width and
the maximum field width.
If
.Ar min
begins with a zero, zeros will be added to make up the field width,
and if it begins with a
begins with a zero, zeros will be prepended to make up the field width
instead of blanks, and if it begins with a
.Sq \&- ,
the fragment will be left-adjusted
within the field.
@ -98,6 +98,22 @@ The newline normally appended to each output line is omitted.
.Pp
To print files simultaneously for easy viewing use
.Xr pr 1 .
.Sh ENVIRONMENT
.Bl -tag -width LC_CTYPE
.It Ev LC_CTYPE
The character encoding
.Xr locale 1 .
It determines the display widths of characters used by the
.Fl f
and
.Fl p
options.
If unset or set to
.Qq C ,
.Qq POSIX ,
or an unsupported value, each byte is regarded as a character
of display width 1.
.El
.Sh EXAMPLES
Join four files together along each line:
.Pp

View File

@ -1,4 +1,4 @@
/* $OpenBSD: lam.c,v 1.21 2018/07/11 11:42:17 schwarze Exp $ */
/* $OpenBSD: lam.c,v 1.22 2018/07/29 11:27:14 schwarze Exp $ */
/* $NetBSD: lam.c,v 1.2 1994/11/14 20:27:42 jtc Exp $ */
/*-
@ -39,6 +39,7 @@
#include <ctype.h>
#include <err.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -48,11 +49,13 @@
struct openfile { /* open file structure */
FILE *fp; /* file pointer */
int minwidth; /* pad this column to this width */
int maxwidth; /* truncate this column */
short eof; /* eof flag */
short pad; /* pad flag for missing columns */
char eol; /* end of line character */
char align; /* '0' for zero fill, '-' for left align */
char *sepstring; /* string to print before each line */
char *format; /* printf(3) style string spec. */
} input[NOFILE_MAX + 1]; /* last one is for the last -s arg. */
#define INPUTSIZE sizeof(input) / sizeof(*input)
@ -61,6 +64,8 @@ int nofinalnl; /* normally append \n to each output line */
char line[BIGBUFSIZ];
char *linep;
int mbswidth_truncate(char *, int); /* utf8.c */
void usage(void);
char *gatherline(struct openfile *);
void getargs(int, char *[]);
@ -71,6 +76,8 @@ main(int argc, char *argv[])
{
int i;
setlocale(LC_CTYPE, "");
if (pledge("stdio rpath", NULL) == -1)
err(1, "pledge");
@ -106,9 +113,9 @@ void
getargs(int argc, char *argv[])
{
struct openfile *ip = input;
char *p;
const char *errstr;
char *p, *q;
int ch, P, S, F, T;
size_t siz;
P = S = F = T = 0; /* capitalized options */
while (optind < argc) {
@ -120,17 +127,28 @@ getargs(int argc, char *argv[])
case 'F': case 'f':
F = (ch == 'F');
/* Validate format string argument. */
for (p = optarg; *p != '\0'; p++)
if (!isdigit((unsigned char)*p) &&
*p != '.' && *p != '-')
errx(1, "%s: invalid width specified",
optarg);
/* '%' + width + 's' + '\0' */
siz = p - optarg + 3;
if ((p = realloc(ip->format, siz)) == NULL)
err(1, NULL);
snprintf(p, siz, "%%%ss", optarg);
ip->format = p;
p = optarg;
if (*p == '0' || *p == '-')
ip->align = *p++;
else
ip->align = ' ';
if ((q = strchr(p, '.')) != NULL)
*q++ = '\0';
if (*p != '\0') {
ip->minwidth = strtonum(p, 1, INT_MAX,
&errstr);
if (errstr != NULL)
errx(1, "minimum width is %s: %s",
errstr, p);
}
if (q != NULL) {
ip->maxwidth = strtonum(q, 1, INT_MAX,
&errstr);
if (errstr != NULL)
errx(1, "maximum width is %s: %s",
errstr, q);
} else
ip->maxwidth = INT_MAX;
break;
case 'S': case 's':
S = (ch == 'S');
@ -157,10 +175,16 @@ getargs(int argc, char *argv[])
ip->pad = P;
if (ip->sepstring == NULL)
ip->sepstring = S ? (ip-1)->sepstring : "";
if (ip->format == NULL)
ip->format = (P || F) ? (ip-1)->format : "%s";
if (ip->eol == '\0')
ip->eol = T ? (ip-1)->eol : '\n';
if (ip->align == '\0') {
if (F || P) {
ip->align = (ip-1)->align;
ip->minwidth = (ip-1)->minwidth;
ip->maxwidth = (ip-1)->maxwidth;
} else
ip->maxwidth = INT_MAX;
}
ip++;
optind++;
break;
@ -179,14 +203,14 @@ pad(struct openfile *ip)
{
size_t n;
char *lp = linep;
int i = 0;
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
if (ip->pad) {
n = snprintf(lp, line + sizeof(line) - lp, ip->format, "");
if (n > 0)
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
}
if (ip->pad)
while (i++ < ip->minwidth && lp + 1 < line + sizeof(line))
*lp++ = ' ';
*lp = '\0';
return (lp);
}
@ -202,7 +226,7 @@ gatherline(struct openfile *ip)
char *p;
char *lp = linep;
char *end = s + BUFSIZ - 1;
int c;
int c, width;
if (ip->eof)
return (pad(ip));
@ -220,9 +244,16 @@ gatherline(struct openfile *ip)
numfiles++;
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
n = snprintf(lp, line + sizeof(line) - lp, ip->format, s);
if (n > 0)
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
width = mbswidth_truncate(s, ip->maxwidth);
if (ip->align != '-')
while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
*lp++ = ip->align;
n = strlcpy(lp, s, line + sizeof(line) - lp);
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
if (ip->align == '-')
while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
*lp++ = ' ';
*lp = '\0';
return (lp);
}

47
usr.bin/lam/utf8.c Normal file
View File

@ -0,0 +1,47 @@
/* $OpenBSD: utf8.c,v 1.1 2018/07/29 11:27:15 schwarze Exp $ */
/*
* Copyright (c) 2018 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <wchar.h>
/*
* Measure the display width of the multibyte string.
* Treat invalid bytes and non-printable characters as width 1.
* Truncate the string to a display width of maxwidth.
* Return the total width, possibly after truncation.
*/
int
mbswidth_truncate(char *mbs, int maxwidth)
{
wchar_t wc;
int len, width, sum;
sum = 0;
while (*mbs != '\0') {
if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1)
len = width = 1;
else if ((width = wcwidth(wc)) < 0)
width = 1;
if (sum + width > maxwidth) {
*mbs = '\0';
break;
}
sum += width;
mbs += len;
}
return sum;
}