From b6203726b0c2b99029f120541ba7391e510b4b99 Mon Sep 17 00:00:00 2001 From: schwarze Date: Tue, 1 Dec 2015 18:36:13 +0000 Subject: [PATCH] Support UTF-8: use wcwidth(3) for column adjustment and replace non-printable Unicode codepoints and invalid bytes with ASCII question marks. No change for the SMALL version. Using ideas developed by tedu@, phessler@, bentley@ and feedback from many. OK yasuoka@ czarkoff@ sthen@. --- bin/ls/Makefile | 4 ++-- bin/ls/extern.h | 4 ++-- bin/ls/ls.1 | 10 ++++++++-- bin/ls/ls.c | 12 +++++++++--- bin/ls/print.c | 11 ++++++----- bin/ls/utf8.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ bin/ls/util.c | 9 +++++++-- 7 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 bin/ls/utf8.c diff --git a/bin/ls/Makefile b/bin/ls/Makefile index defd6071b95..026ce6d3da4 100644 --- a/bin/ls/Makefile +++ b/bin/ls/Makefile @@ -1,7 +1,7 @@ -# $OpenBSD: Makefile,v 1.7 2003/08/06 19:09:09 tedu Exp $ +# $OpenBSD: Makefile,v 1.8 2015/12/01 18:36:13 schwarze Exp $ PROG= ls -SRCS= cmp.c ls.c main.c print.c util.c +SRCS= cmp.c ls.c main.c print.c util.c utf8.c DPADD= ${LIBUTIL} LDADD= -lutil diff --git a/bin/ls/extern.h b/bin/ls/extern.h index 7f7807a612f..afe3fb47e68 100644 --- a/bin/ls/extern.h +++ b/bin/ls/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.9 2003/06/02 23:32:08 millert Exp $ */ +/* $OpenBSD: extern.h,v 1.10 2015/12/01 18:36:13 schwarze Exp $ */ /* $NetBSD: extern.h,v 1.5 1995/03/21 09:06:24 cgd Exp $ */ /*- @@ -45,7 +45,7 @@ int revstatcmp(const FTSENT *, const FTSENT *); int sizecmp(const FTSENT *, const FTSENT *); int revsizecmp(const FTSENT *, const FTSENT *); -int putname(char *); +int mbsprint(const char *, int); void printcol(DISPLAY *); void printacol(DISPLAY *); void printlong(DISPLAY *); diff --git a/bin/ls/ls.1 b/bin/ls/ls.1 index 9fe555dc3de..1daaca58f61 100644 --- a/bin/ls/ls.1 +++ b/bin/ls/ls.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: ls.1,v 1.72 2015/04/24 10:57:36 sobrado Exp $ +.\" $OpenBSD: ls.1,v 1.73 2015/12/01 18:36:13 schwarze Exp $ .\" $NetBSD: ls.1,v 1.14 1995/12/05 02:44:01 jtc Exp $ .\" .\" Copyright (c) 1980, 1990, 1991, 1993, 1994 @@ -33,7 +33,7 @@ .\" .\" @(#)ls.1 8.7 (Berkeley) 7/29/94 .\" -.Dd $Mdocdate: April 24 2015 $ +.Dd $Mdocdate: December 1 2015 $ .Dt LS 1 .Os .Sh NAME @@ -440,6 +440,12 @@ If this variable contains a string representing a decimal integer, it is used as the column position width for displaying multiple-text-column output. +.It Ev LC_CTYPE +If set to a string ending in +.Qq .UTF-8 , +.Nm +respects character display widths when columnating output. +Otherwise, non-ASCII bytes are replaced by question marks. .It Ev TZ The time zone to use when displaying dates. See diff --git a/bin/ls/ls.c b/bin/ls/ls.c index da93dd91e10..6341bfc6fa2 100644 --- a/bin/ls/ls.c +++ b/bin/ls/ls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ls.c,v 1.43 2015/10/09 01:37:06 deraadt Exp $ */ +/* $OpenBSD: ls.c,v 1.44 2015/12/01 18:36:13 schwarze Exp $ */ /* $NetBSD: ls.c,v 1.18 1996/07/09 09:16:29 mycroft Exp $ */ /* @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "ls.h" @@ -103,6 +104,10 @@ ls_main(int argc, char *argv[]) int kflag = 0, width = 0; char *p; +#ifndef SMALL + setlocale(LC_CTYPE, ""); +#endif + /* Terminal defaults to -Cq, non-terminal defaults to -1. */ if (isatty(STDOUT_FILENO)) { if ((p = getenv("COLUMNS")) != NULL) @@ -428,6 +433,7 @@ display(FTSENT *p, FTSENT *list) ino_t maxinode; int bcfile, flen, glen, ulen, maxflags, maxgroup, maxuser; int entries, needstats; + int width; char *user, *group, buf[21]; /* 64 bits == 20 digits */ char nuser[12], ngroup[12]; char *flags = NULL; @@ -474,8 +480,8 @@ display(FTSENT *p, FTSENT *list) continue; } } - if (cur->fts_namelen > maxlen) - maxlen = cur->fts_namelen; + if ((width = mbsprint(cur->fts_name, 0)) > maxlen) + maxlen = width; if (needstats) { sp = cur->fts_statp; if (sp->st_blocks > maxblock) diff --git a/bin/ls/print.c b/bin/ls/print.c index 6709c7b27e3..6af6db3366f 100644 --- a/bin/ls/print.c +++ b/bin/ls/print.c @@ -1,4 +1,4 @@ -/* $OpenBSD: print.c,v 1.34 2015/03/15 00:41:27 millert Exp $ */ +/* $OpenBSD: print.c,v 1.35 2015/12/01 18:36:13 schwarze Exp $ */ /* $NetBSD: print.c,v 1.15 1996/12/11 03:25:39 thorpej Exp $ */ /* @@ -122,7 +122,7 @@ printlong(DISPLAY *dp) printtime(sp->st_ctime); else printtime(sp->st_mtime); - (void)putname(p->fts_name); + (void)mbsprint(p->fts_name, 1); if (f_type || (f_typedir && S_ISDIR(sp->st_mode))) (void)printtype(sp->st_mode); if (S_ISLNK(sp->st_mode)) @@ -231,7 +231,7 @@ printaname(FTSENT *p, u_long inodefield, u_long sizefield) if (f_size) chcnt += printf("%*qd ", (int)sizefield, howmany(sp->st_blocks, blocksize)); - chcnt += putname(p->fts_name); + chcnt += mbsprint(p->fts_name, 1); if (f_type || (f_typedir && S_ISDIR(sp->st_mode))) chcnt += printtype(sp->st_mode); return (chcnt); @@ -310,7 +310,8 @@ printstream(DISPLAY *dp) continue; if (col > 0) { (void)putchar(','), col++; - if (col + 1 + extwidth + p->fts_namelen >= termwidth) + if (col + 1 + extwidth + mbsprint(p->fts_name, 0) >= + termwidth) (void)putchar('\n'), col = 0; else (void)putchar(' '), col++; @@ -361,7 +362,7 @@ printlink(FTSENT *p) } path[lnklen] = '\0'; (void)printf(" -> "); - (void)putname(path); + (void)mbsprint(path, 1); } static void diff --git a/bin/ls/utf8.c b/bin/ls/utf8.c new file mode 100644 index 00000000000..3825c531f16 --- /dev/null +++ b/bin/ls/utf8.c @@ -0,0 +1,51 @@ +/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */ + +/* + * Copyright (c) 2015 Ingo Schwarze + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef SMALL +#include +#include +#include + +int +mbsprint(const char *mbs, int print) +{ + wchar_t wc; + int len; /* length in bytes of UTF-8 encoded string */ + int width; /* display width of a single Unicode char */ + int total_width; /* display width of the whole string */ + + for (total_width = 0; *mbs != '\0'; mbs += len) { + if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1) { + (void)mbtowc(NULL, NULL, MB_CUR_MAX); + if (print) + putchar('?'); + total_width++; + len = 1; + } else if ((width = wcwidth(wc)) == -1) { + if (print) + putchar('?'); + total_width++; + } else { + if (print) + fwrite(mbs, 1, len, stdout); + total_width += width; + } + } + return total_width; +} +#endif diff --git a/bin/ls/util.c b/bin/ls/util.c index 6ba1a7e2cee..d9a0552fe04 100644 --- a/bin/ls/util.c +++ b/bin/ls/util.c @@ -1,4 +1,4 @@ -/* $OpenBSD: util.c,v 1.16 2013/11/21 15:54:45 deraadt Exp $ */ +/* $OpenBSD: util.c,v 1.17 2015/12/01 18:36:13 schwarze Exp $ */ /* $NetBSD: util.c,v 1.12 1995/09/07 06:43:02 jtc Exp $ */ /* @@ -45,15 +45,20 @@ #include "ls.h" #include "extern.h" +#ifdef SMALL int -putname(char *name) +mbsprint(const char *name, int print) { int len; + if (print == 0) + return strlen(name); + for (len = 0; *name; len++, name++) putchar((!isprint((unsigned char)*name) && f_nonprint) ? '?' : *name); return len; } +#endif void usage(void)