1
0
mirror of https://github.com/openbsd/src.git synced 2025-01-02 22:35:36 -08:00

Support UTF-8: use wcwidth(3) for column adjustment and replace

non-printable Unicode codepoints and invalid bytes with ASCII
question marks.  No change for the SMALL version.

Using ideas developed by tedu@, phessler@, bentley@ and feedback from many.
OK yasuoka@ czarkoff@ sthen@.
This commit is contained in:
schwarze 2015-12-01 18:36:13 +00:00
parent 062b28b9b1
commit b6203726b0
7 changed files with 85 additions and 16 deletions

View File

@ -1,7 +1,7 @@
# $OpenBSD: Makefile,v 1.7 2003/08/06 19:09:09 tedu Exp $
# $OpenBSD: Makefile,v 1.8 2015/12/01 18:36:13 schwarze Exp $
PROG= ls
SRCS= cmp.c ls.c main.c print.c util.c
SRCS= cmp.c ls.c main.c print.c util.c utf8.c
DPADD= ${LIBUTIL}
LDADD= -lutil

View File

@ -1,4 +1,4 @@
/* $OpenBSD: extern.h,v 1.9 2003/06/02 23:32:08 millert Exp $ */
/* $OpenBSD: extern.h,v 1.10 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: extern.h,v 1.5 1995/03/21 09:06:24 cgd Exp $ */
/*-
@ -45,7 +45,7 @@ int revstatcmp(const FTSENT *, const FTSENT *);
int sizecmp(const FTSENT *, const FTSENT *);
int revsizecmp(const FTSENT *, const FTSENT *);
int putname(char *);
int mbsprint(const char *, int);
void printcol(DISPLAY *);
void printacol(DISPLAY *);
void printlong(DISPLAY *);

View File

@ -1,4 +1,4 @@
.\" $OpenBSD: ls.1,v 1.72 2015/04/24 10:57:36 sobrado Exp $
.\" $OpenBSD: ls.1,v 1.73 2015/12/01 18:36:13 schwarze Exp $
.\" $NetBSD: ls.1,v 1.14 1995/12/05 02:44:01 jtc Exp $
.\"
.\" Copyright (c) 1980, 1990, 1991, 1993, 1994
@ -33,7 +33,7 @@
.\"
.\" @(#)ls.1 8.7 (Berkeley) 7/29/94
.\"
.Dd $Mdocdate: April 24 2015 $
.Dd $Mdocdate: December 1 2015 $
.Dt LS 1
.Os
.Sh NAME
@ -440,6 +440,12 @@ If this variable contains a string representing a
decimal integer, it is used as the
column position width for displaying
multiple-text-column output.
.It Ev LC_CTYPE
If set to a string ending in
.Qq .UTF-8 ,
.Nm
respects character display widths when columnating output.
Otherwise, non-ASCII bytes are replaced by question marks.
.It Ev TZ
The time zone to use when displaying dates.
See

View File

@ -1,4 +1,4 @@
/* $OpenBSD: ls.c,v 1.43 2015/10/09 01:37:06 deraadt Exp $ */
/* $OpenBSD: ls.c,v 1.44 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: ls.c,v 1.18 1996/07/09 09:16:29 mycroft Exp $ */
/*
@ -48,6 +48,7 @@
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <locale.h>
#include <util.h>
#include "ls.h"
@ -103,6 +104,10 @@ ls_main(int argc, char *argv[])
int kflag = 0, width = 0;
char *p;
#ifndef SMALL
setlocale(LC_CTYPE, "");
#endif
/* Terminal defaults to -Cq, non-terminal defaults to -1. */
if (isatty(STDOUT_FILENO)) {
if ((p = getenv("COLUMNS")) != NULL)
@ -428,6 +433,7 @@ display(FTSENT *p, FTSENT *list)
ino_t maxinode;
int bcfile, flen, glen, ulen, maxflags, maxgroup, maxuser;
int entries, needstats;
int width;
char *user, *group, buf[21]; /* 64 bits == 20 digits */
char nuser[12], ngroup[12];
char *flags = NULL;
@ -474,8 +480,8 @@ display(FTSENT *p, FTSENT *list)
continue;
}
}
if (cur->fts_namelen > maxlen)
maxlen = cur->fts_namelen;
if ((width = mbsprint(cur->fts_name, 0)) > maxlen)
maxlen = width;
if (needstats) {
sp = cur->fts_statp;
if (sp->st_blocks > maxblock)

View File

@ -1,4 +1,4 @@
/* $OpenBSD: print.c,v 1.34 2015/03/15 00:41:27 millert Exp $ */
/* $OpenBSD: print.c,v 1.35 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: print.c,v 1.15 1996/12/11 03:25:39 thorpej Exp $ */
/*
@ -122,7 +122,7 @@ printlong(DISPLAY *dp)
printtime(sp->st_ctime);
else
printtime(sp->st_mtime);
(void)putname(p->fts_name);
(void)mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
(void)printtype(sp->st_mode);
if (S_ISLNK(sp->st_mode))
@ -231,7 +231,7 @@ printaname(FTSENT *p, u_long inodefield, u_long sizefield)
if (f_size)
chcnt += printf("%*qd ",
(int)sizefield, howmany(sp->st_blocks, blocksize));
chcnt += putname(p->fts_name);
chcnt += mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
chcnt += printtype(sp->st_mode);
return (chcnt);
@ -310,7 +310,8 @@ printstream(DISPLAY *dp)
continue;
if (col > 0) {
(void)putchar(','), col++;
if (col + 1 + extwidth + p->fts_namelen >= termwidth)
if (col + 1 + extwidth + mbsprint(p->fts_name, 0) >=
termwidth)
(void)putchar('\n'), col = 0;
else
(void)putchar(' '), col++;
@ -361,7 +362,7 @@ printlink(FTSENT *p)
}
path[lnklen] = '\0';
(void)printf(" -> ");
(void)putname(path);
(void)mbsprint(path, 1);
}
static void

51
bin/ls/utf8.c Normal file
View File

@ -0,0 +1,51 @@
/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */
/*
* Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef SMALL
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
int
mbsprint(const char *mbs, int print)
{
wchar_t wc;
int len; /* length in bytes of UTF-8 encoded string */
int width; /* display width of a single Unicode char */
int total_width; /* display width of the whole string */
for (total_width = 0; *mbs != '\0'; mbs += len) {
if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1) {
(void)mbtowc(NULL, NULL, MB_CUR_MAX);
if (print)
putchar('?');
total_width++;
len = 1;
} else if ((width = wcwidth(wc)) == -1) {
if (print)
putchar('?');
total_width++;
} else {
if (print)
fwrite(mbs, 1, len, stdout);
total_width += width;
}
}
return total_width;
}
#endif

View File

@ -1,4 +1,4 @@
/* $OpenBSD: util.c,v 1.16 2013/11/21 15:54:45 deraadt Exp $ */
/* $OpenBSD: util.c,v 1.17 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: util.c,v 1.12 1995/09/07 06:43:02 jtc Exp $ */
/*
@ -45,15 +45,20 @@
#include "ls.h"
#include "extern.h"
#ifdef SMALL
int
putname(char *name)
mbsprint(const char *name, int print)
{
int len;
if (print == 0)
return strlen(name);
for (len = 0; *name; len++, name++)
putchar((!isprint((unsigned char)*name) && f_nonprint) ? '?' : *name);
return len;
}
#endif
void
usage(void)