1
0
mirror of https://github.com/openbsd/src.git synced 2025-01-04 23:35:36 -08:00

Support UTF-8: use wcwidth(3) for column adjustment and replace

non-printable Unicode codepoints and invalid bytes with ASCII
question marks.  No change for the SMALL version.

Using ideas developed by tedu@, phessler@, bentley@ and feedback from many.
OK yasuoka@ czarkoff@ sthen@.
This commit is contained in:
schwarze 2015-12-01 18:36:13 +00:00
parent 062b28b9b1
commit b6203726b0
7 changed files with 85 additions and 16 deletions

View File

@ -1,7 +1,7 @@
# $OpenBSD: Makefile,v 1.7 2003/08/06 19:09:09 tedu Exp $ # $OpenBSD: Makefile,v 1.8 2015/12/01 18:36:13 schwarze Exp $
PROG= ls PROG= ls
SRCS= cmp.c ls.c main.c print.c util.c SRCS= cmp.c ls.c main.c print.c util.c utf8.c
DPADD= ${LIBUTIL} DPADD= ${LIBUTIL}
LDADD= -lutil LDADD= -lutil

View File

@ -1,4 +1,4 @@
/* $OpenBSD: extern.h,v 1.9 2003/06/02 23:32:08 millert Exp $ */ /* $OpenBSD: extern.h,v 1.10 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: extern.h,v 1.5 1995/03/21 09:06:24 cgd Exp $ */ /* $NetBSD: extern.h,v 1.5 1995/03/21 09:06:24 cgd Exp $ */
/*- /*-
@ -45,7 +45,7 @@ int revstatcmp(const FTSENT *, const FTSENT *);
int sizecmp(const FTSENT *, const FTSENT *); int sizecmp(const FTSENT *, const FTSENT *);
int revsizecmp(const FTSENT *, const FTSENT *); int revsizecmp(const FTSENT *, const FTSENT *);
int putname(char *); int mbsprint(const char *, int);
void printcol(DISPLAY *); void printcol(DISPLAY *);
void printacol(DISPLAY *); void printacol(DISPLAY *);
void printlong(DISPLAY *); void printlong(DISPLAY *);

View File

@ -1,4 +1,4 @@
.\" $OpenBSD: ls.1,v 1.72 2015/04/24 10:57:36 sobrado Exp $ .\" $OpenBSD: ls.1,v 1.73 2015/12/01 18:36:13 schwarze Exp $
.\" $NetBSD: ls.1,v 1.14 1995/12/05 02:44:01 jtc Exp $ .\" $NetBSD: ls.1,v 1.14 1995/12/05 02:44:01 jtc Exp $
.\" .\"
.\" Copyright (c) 1980, 1990, 1991, 1993, 1994 .\" Copyright (c) 1980, 1990, 1991, 1993, 1994
@ -33,7 +33,7 @@
.\" .\"
.\" @(#)ls.1 8.7 (Berkeley) 7/29/94 .\" @(#)ls.1 8.7 (Berkeley) 7/29/94
.\" .\"
.Dd $Mdocdate: April 24 2015 $ .Dd $Mdocdate: December 1 2015 $
.Dt LS 1 .Dt LS 1
.Os .Os
.Sh NAME .Sh NAME
@ -440,6 +440,12 @@ If this variable contains a string representing a
decimal integer, it is used as the decimal integer, it is used as the
column position width for displaying column position width for displaying
multiple-text-column output. multiple-text-column output.
.It Ev LC_CTYPE
If set to a string ending in
.Qq .UTF-8 ,
.Nm
respects character display widths when columnating output.
Otherwise, non-ASCII bytes are replaced by question marks.
.It Ev TZ .It Ev TZ
The time zone to use when displaying dates. The time zone to use when displaying dates.
See See

View File

@ -1,4 +1,4 @@
/* $OpenBSD: ls.c,v 1.43 2015/10/09 01:37:06 deraadt Exp $ */ /* $OpenBSD: ls.c,v 1.44 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: ls.c,v 1.18 1996/07/09 09:16:29 mycroft Exp $ */ /* $NetBSD: ls.c,v 1.18 1996/07/09 09:16:29 mycroft Exp $ */
/* /*
@ -48,6 +48,7 @@
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
#include <limits.h> #include <limits.h>
#include <locale.h>
#include <util.h> #include <util.h>
#include "ls.h" #include "ls.h"
@ -103,6 +104,10 @@ ls_main(int argc, char *argv[])
int kflag = 0, width = 0; int kflag = 0, width = 0;
char *p; char *p;
#ifndef SMALL
setlocale(LC_CTYPE, "");
#endif
/* Terminal defaults to -Cq, non-terminal defaults to -1. */ /* Terminal defaults to -Cq, non-terminal defaults to -1. */
if (isatty(STDOUT_FILENO)) { if (isatty(STDOUT_FILENO)) {
if ((p = getenv("COLUMNS")) != NULL) if ((p = getenv("COLUMNS")) != NULL)
@ -428,6 +433,7 @@ display(FTSENT *p, FTSENT *list)
ino_t maxinode; ino_t maxinode;
int bcfile, flen, glen, ulen, maxflags, maxgroup, maxuser; int bcfile, flen, glen, ulen, maxflags, maxgroup, maxuser;
int entries, needstats; int entries, needstats;
int width;
char *user, *group, buf[21]; /* 64 bits == 20 digits */ char *user, *group, buf[21]; /* 64 bits == 20 digits */
char nuser[12], ngroup[12]; char nuser[12], ngroup[12];
char *flags = NULL; char *flags = NULL;
@ -474,8 +480,8 @@ display(FTSENT *p, FTSENT *list)
continue; continue;
} }
} }
if (cur->fts_namelen > maxlen) if ((width = mbsprint(cur->fts_name, 0)) > maxlen)
maxlen = cur->fts_namelen; maxlen = width;
if (needstats) { if (needstats) {
sp = cur->fts_statp; sp = cur->fts_statp;
if (sp->st_blocks > maxblock) if (sp->st_blocks > maxblock)

View File

@ -1,4 +1,4 @@
/* $OpenBSD: print.c,v 1.34 2015/03/15 00:41:27 millert Exp $ */ /* $OpenBSD: print.c,v 1.35 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: print.c,v 1.15 1996/12/11 03:25:39 thorpej Exp $ */ /* $NetBSD: print.c,v 1.15 1996/12/11 03:25:39 thorpej Exp $ */
/* /*
@ -122,7 +122,7 @@ printlong(DISPLAY *dp)
printtime(sp->st_ctime); printtime(sp->st_ctime);
else else
printtime(sp->st_mtime); printtime(sp->st_mtime);
(void)putname(p->fts_name); (void)mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode))) if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
(void)printtype(sp->st_mode); (void)printtype(sp->st_mode);
if (S_ISLNK(sp->st_mode)) if (S_ISLNK(sp->st_mode))
@ -231,7 +231,7 @@ printaname(FTSENT *p, u_long inodefield, u_long sizefield)
if (f_size) if (f_size)
chcnt += printf("%*qd ", chcnt += printf("%*qd ",
(int)sizefield, howmany(sp->st_blocks, blocksize)); (int)sizefield, howmany(sp->st_blocks, blocksize));
chcnt += putname(p->fts_name); chcnt += mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode))) if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
chcnt += printtype(sp->st_mode); chcnt += printtype(sp->st_mode);
return (chcnt); return (chcnt);
@ -310,7 +310,8 @@ printstream(DISPLAY *dp)
continue; continue;
if (col > 0) { if (col > 0) {
(void)putchar(','), col++; (void)putchar(','), col++;
if (col + 1 + extwidth + p->fts_namelen >= termwidth) if (col + 1 + extwidth + mbsprint(p->fts_name, 0) >=
termwidth)
(void)putchar('\n'), col = 0; (void)putchar('\n'), col = 0;
else else
(void)putchar(' '), col++; (void)putchar(' '), col++;
@ -361,7 +362,7 @@ printlink(FTSENT *p)
} }
path[lnklen] = '\0'; path[lnklen] = '\0';
(void)printf(" -> "); (void)printf(" -> ");
(void)putname(path); (void)mbsprint(path, 1);
} }
static void static void

51
bin/ls/utf8.c Normal file
View File

@ -0,0 +1,51 @@
/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */
/*
* Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef SMALL
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
int
mbsprint(const char *mbs, int print)
{
wchar_t wc;
int len; /* length in bytes of UTF-8 encoded string */
int width; /* display width of a single Unicode char */
int total_width; /* display width of the whole string */
for (total_width = 0; *mbs != '\0'; mbs += len) {
if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1) {
(void)mbtowc(NULL, NULL, MB_CUR_MAX);
if (print)
putchar('?');
total_width++;
len = 1;
} else if ((width = wcwidth(wc)) == -1) {
if (print)
putchar('?');
total_width++;
} else {
if (print)
fwrite(mbs, 1, len, stdout);
total_width += width;
}
}
return total_width;
}
#endif

View File

@ -1,4 +1,4 @@
/* $OpenBSD: util.c,v 1.16 2013/11/21 15:54:45 deraadt Exp $ */ /* $OpenBSD: util.c,v 1.17 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: util.c,v 1.12 1995/09/07 06:43:02 jtc Exp $ */ /* $NetBSD: util.c,v 1.12 1995/09/07 06:43:02 jtc Exp $ */
/* /*
@ -45,15 +45,20 @@
#include "ls.h" #include "ls.h"
#include "extern.h" #include "extern.h"
#ifdef SMALL
int int
putname(char *name) mbsprint(const char *name, int print)
{ {
int len; int len;
if (print == 0)
return strlen(name);
for (len = 0; *name; len++, name++) for (len = 0; *name; len++, name++)
putchar((!isprint((unsigned char)*name) && f_nonprint) ? '?' : *name); putchar((!isprint((unsigned char)*name) && f_nonprint) ? '?' : *name);
return len; return len;
} }
#endif
void void
usage(void) usage(void)