mirror of
https://github.com/openbsd/src.git
synced 2025-01-10 06:47:55 -08:00
UTF-8 support: use wcwidth(3) when calculating column widths;
written during g218; no objection when shown on tech@
This commit is contained in:
parent
925939b706
commit
94b41d461e
@ -1,5 +1,6 @@
|
||||
# $OpenBSD: Makefile,v 1.3 1997/09/21 11:49:24 deraadt Exp $
|
||||
# $OpenBSD: Makefile,v 1.4 2018/07/29 11:27:14 schwarze Exp $
|
||||
|
||||
PROG= lam
|
||||
SRCS= lam.c utf8.c
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
|
@ -1,4 +1,4 @@
|
||||
.\" $OpenBSD: lam.1,v 1.9 2016/01/04 23:21:28 schwarze Exp $
|
||||
.\" $OpenBSD: lam.1,v 1.10 2018/07/29 11:27:14 schwarze Exp $
|
||||
.\" $NetBSD: lam.1,v 1.4 2002/02/08 01:36:25 ross Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 1993
|
||||
@ -30,7 +30,7 @@
|
||||
.\"
|
||||
.\" @(#)lam.1 8.1 (Berkeley) 6/6/93
|
||||
.\"
|
||||
.Dd $Mdocdate: January 4 2016 $
|
||||
.Dd $Mdocdate: July 29 2018 $
|
||||
.Dt LAM 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -74,8 +74,8 @@ is the minimum field width and
|
||||
the maximum field width.
|
||||
If
|
||||
.Ar min
|
||||
begins with a zero, zeros will be added to make up the field width,
|
||||
and if it begins with a
|
||||
begins with a zero, zeros will be prepended to make up the field width
|
||||
instead of blanks, and if it begins with a
|
||||
.Sq \&- ,
|
||||
the fragment will be left-adjusted
|
||||
within the field.
|
||||
@ -98,6 +98,22 @@ The newline normally appended to each output line is omitted.
|
||||
.Pp
|
||||
To print files simultaneously for easy viewing use
|
||||
.Xr pr 1 .
|
||||
.Sh ENVIRONMENT
|
||||
.Bl -tag -width LC_CTYPE
|
||||
.It Ev LC_CTYPE
|
||||
The character encoding
|
||||
.Xr locale 1 .
|
||||
It determines the display widths of characters used by the
|
||||
.Fl f
|
||||
and
|
||||
.Fl p
|
||||
options.
|
||||
If unset or set to
|
||||
.Qq C ,
|
||||
.Qq POSIX ,
|
||||
or an unsupported value, each byte is regarded as a character
|
||||
of display width 1.
|
||||
.El
|
||||
.Sh EXAMPLES
|
||||
Join four files together along each line:
|
||||
.Pp
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: lam.c,v 1.21 2018/07/11 11:42:17 schwarze Exp $ */
|
||||
/* $OpenBSD: lam.c,v 1.22 2018/07/29 11:27:14 schwarze Exp $ */
|
||||
/* $NetBSD: lam.c,v 1.2 1994/11/14 20:27:42 jtc Exp $ */
|
||||
|
||||
/*-
|
||||
@ -39,6 +39,7 @@
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -48,11 +49,13 @@
|
||||
|
||||
struct openfile { /* open file structure */
|
||||
FILE *fp; /* file pointer */
|
||||
int minwidth; /* pad this column to this width */
|
||||
int maxwidth; /* truncate this column */
|
||||
short eof; /* eof flag */
|
||||
short pad; /* pad flag for missing columns */
|
||||
char eol; /* end of line character */
|
||||
char align; /* '0' for zero fill, '-' for left align */
|
||||
char *sepstring; /* string to print before each line */
|
||||
char *format; /* printf(3) style string spec. */
|
||||
} input[NOFILE_MAX + 1]; /* last one is for the last -s arg. */
|
||||
#define INPUTSIZE sizeof(input) / sizeof(*input)
|
||||
|
||||
@ -61,6 +64,8 @@ int nofinalnl; /* normally append \n to each output line */
|
||||
char line[BIGBUFSIZ];
|
||||
char *linep;
|
||||
|
||||
int mbswidth_truncate(char *, int); /* utf8.c */
|
||||
|
||||
void usage(void);
|
||||
char *gatherline(struct openfile *);
|
||||
void getargs(int, char *[]);
|
||||
@ -71,6 +76,8 @@ main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
|
||||
setlocale(LC_CTYPE, "");
|
||||
|
||||
if (pledge("stdio rpath", NULL) == -1)
|
||||
err(1, "pledge");
|
||||
|
||||
@ -106,9 +113,9 @@ void
|
||||
getargs(int argc, char *argv[])
|
||||
{
|
||||
struct openfile *ip = input;
|
||||
char *p;
|
||||
const char *errstr;
|
||||
char *p, *q;
|
||||
int ch, P, S, F, T;
|
||||
size_t siz;
|
||||
|
||||
P = S = F = T = 0; /* capitalized options */
|
||||
while (optind < argc) {
|
||||
@ -120,17 +127,28 @@ getargs(int argc, char *argv[])
|
||||
case 'F': case 'f':
|
||||
F = (ch == 'F');
|
||||
/* Validate format string argument. */
|
||||
for (p = optarg; *p != '\0'; p++)
|
||||
if (!isdigit((unsigned char)*p) &&
|
||||
*p != '.' && *p != '-')
|
||||
errx(1, "%s: invalid width specified",
|
||||
optarg);
|
||||
/* '%' + width + 's' + '\0' */
|
||||
siz = p - optarg + 3;
|
||||
if ((p = realloc(ip->format, siz)) == NULL)
|
||||
err(1, NULL);
|
||||
snprintf(p, siz, "%%%ss", optarg);
|
||||
ip->format = p;
|
||||
p = optarg;
|
||||
if (*p == '0' || *p == '-')
|
||||
ip->align = *p++;
|
||||
else
|
||||
ip->align = ' ';
|
||||
if ((q = strchr(p, '.')) != NULL)
|
||||
*q++ = '\0';
|
||||
if (*p != '\0') {
|
||||
ip->minwidth = strtonum(p, 1, INT_MAX,
|
||||
&errstr);
|
||||
if (errstr != NULL)
|
||||
errx(1, "minimum width is %s: %s",
|
||||
errstr, p);
|
||||
}
|
||||
if (q != NULL) {
|
||||
ip->maxwidth = strtonum(q, 1, INT_MAX,
|
||||
&errstr);
|
||||
if (errstr != NULL)
|
||||
errx(1, "maximum width is %s: %s",
|
||||
errstr, q);
|
||||
} else
|
||||
ip->maxwidth = INT_MAX;
|
||||
break;
|
||||
case 'S': case 's':
|
||||
S = (ch == 'S');
|
||||
@ -157,10 +175,16 @@ getargs(int argc, char *argv[])
|
||||
ip->pad = P;
|
||||
if (ip->sepstring == NULL)
|
||||
ip->sepstring = S ? (ip-1)->sepstring : "";
|
||||
if (ip->format == NULL)
|
||||
ip->format = (P || F) ? (ip-1)->format : "%s";
|
||||
if (ip->eol == '\0')
|
||||
ip->eol = T ? (ip-1)->eol : '\n';
|
||||
if (ip->align == '\0') {
|
||||
if (F || P) {
|
||||
ip->align = (ip-1)->align;
|
||||
ip->minwidth = (ip-1)->minwidth;
|
||||
ip->maxwidth = (ip-1)->maxwidth;
|
||||
} else
|
||||
ip->maxwidth = INT_MAX;
|
||||
}
|
||||
ip++;
|
||||
optind++;
|
||||
break;
|
||||
@ -179,14 +203,14 @@ pad(struct openfile *ip)
|
||||
{
|
||||
size_t n;
|
||||
char *lp = linep;
|
||||
int i = 0;
|
||||
|
||||
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
|
||||
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
|
||||
if (ip->pad) {
|
||||
n = snprintf(lp, line + sizeof(line) - lp, ip->format, "");
|
||||
if (n > 0)
|
||||
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
|
||||
}
|
||||
if (ip->pad)
|
||||
while (i++ < ip->minwidth && lp + 1 < line + sizeof(line))
|
||||
*lp++ = ' ';
|
||||
*lp = '\0';
|
||||
return (lp);
|
||||
}
|
||||
|
||||
@ -202,7 +226,7 @@ gatherline(struct openfile *ip)
|
||||
char *p;
|
||||
char *lp = linep;
|
||||
char *end = s + BUFSIZ - 1;
|
||||
int c;
|
||||
int c, width;
|
||||
|
||||
if (ip->eof)
|
||||
return (pad(ip));
|
||||
@ -220,9 +244,16 @@ gatherline(struct openfile *ip)
|
||||
numfiles++;
|
||||
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
|
||||
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
|
||||
n = snprintf(lp, line + sizeof(line) - lp, ip->format, s);
|
||||
if (n > 0)
|
||||
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
|
||||
width = mbswidth_truncate(s, ip->maxwidth);
|
||||
if (ip->align != '-')
|
||||
while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
|
||||
*lp++ = ip->align;
|
||||
n = strlcpy(lp, s, line + sizeof(line) - lp);
|
||||
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
|
||||
if (ip->align == '-')
|
||||
while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
|
||||
*lp++ = ' ';
|
||||
*lp = '\0';
|
||||
return (lp);
|
||||
}
|
||||
|
||||
|
47
usr.bin/lam/utf8.c
Normal file
47
usr.bin/lam/utf8.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* $OpenBSD: utf8.c,v 1.1 2018/07/29 11:27:15 schwarze Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2018 Ingo Schwarze <schwarze@openbsd.org>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
|
||||
/*
|
||||
* Measure the display width of the multibyte string.
|
||||
* Treat invalid bytes and non-printable characters as width 1.
|
||||
* Truncate the string to a display width of maxwidth.
|
||||
* Return the total width, possibly after truncation.
|
||||
*/
|
||||
int
|
||||
mbswidth_truncate(char *mbs, int maxwidth)
|
||||
{
|
||||
wchar_t wc;
|
||||
int len, width, sum;
|
||||
|
||||
sum = 0;
|
||||
while (*mbs != '\0') {
|
||||
if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1)
|
||||
len = width = 1;
|
||||
else if ((width = wcwidth(wc)) < 0)
|
||||
width = 1;
|
||||
if (sum + width > maxwidth) {
|
||||
*mbs = '\0';
|
||||
break;
|
||||
}
|
||||
sum += width;
|
||||
mbs += len;
|
||||
}
|
||||
return sum;
|
||||
}
|
Loading…
Reference in New Issue
Block a user