From 12a80993e688f242e8a5cee047307533707904af Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Tue, 31 Jul 2001 18:46:51 +0000 Subject: [PATCH] ICU-999 Use a UTF-8 file instead of a UTF-16 file. X-SVN-Rev: 5401 --- icu4c/source/test/thaitest/space.txt | Bin 12578 -> 14951 bytes icu4c/source/test/thaitest/thaitest.cpp | 142 +++++++++++------------- 2 files changed, 63 insertions(+), 79 deletions(-) diff --git a/icu4c/source/test/thaitest/space.txt b/icu4c/source/test/thaitest/space.txt index 374d653756151259e83082f71fdb3ef41a6a0eed..65d89c324847b04d8261e7bf42d672c57482889f 100644 GIT binary patch literal 14951 zcma)@;cgv85{3WgDOjG=iP462$d8p0NC>d85prb2Di8RW<_Z^%6{pY`j>HEX<-530Km_8onufI6V|K#qc!}P~tx;jh~5577~ z&kpmG!^Fiehw1gBx_MEbKOd%7tYpb=wdCDl`kjTjXZ>6@u|9uYfHRey`Rp+LyQYCiI2YyaI`Dsv#W;S? z=$pg*4<<0my5A1-)59dPw}*-4`~x|Ii;<#08;>5zS1m1Y6QmHgalqI$p~WJPT}~_j z1gf!}d84DA!vY+^0(hkO61%Lq@o{_5?#|uI8flL>fgjeDFVaCoK!AIF5i2(zD->zu z5kcerwL#$deGN$lDd=Pbu$f;liTRs*VdGPd!5Ym>KL$iUC$zhQBR+t1vIZTMQW*jV zD5V|{A9gT#_Uk5G19-6nbPb~UJn?TQNru2M_Is^u;$H?H0HT&DM7H0KfoM?KZQ$T) z#tt#`Zc1mE}dh`=aQ0Vd|iQ48R@(x<)0Jjo6rnqZje;h98aDS=&m~26~DlBnsa)?FBXo=hRNt!doYXp#kyo>^KY{Q;Y2OrUWv8^t>ZPN+u~5PS)S3CN2Kye*|xeLY(h z#5`62=6%I;QF@SA9$5nzHXq>BBoSY&2ygUMGAtvF!K&MK|~icls8#N1|AHT&ECw7qW*=h@xR~RY{VK87!fq6@rj9_?V}% zm?Y=M|0?PLKRc%96`oOn6u&_;ndB=&ZqoCV`k;uD2>min%+-4Mm_U(g?3|E$(x7=$ ziV>8j%QY^j=H{A_vQsU`G*_H<+M|+Bq`lAERYcfxy7QAM;r3vBLcv|dygv&7uV;gc zq;k{pqv+&?wER%Xv#0zNcU---BNNDU%gvIBj`AIkphmrLg=46Y`?I=9=lI^h;Od8X ziH?lIEV8Qk;YSx)@;Ch}%rFDCPEp5Wiv1nt?Xnb<;h+!>D%=E=J$!r#S zg^3OxJl}@zu|YO6aaLq~Qd)c35>;%$5X4d(T<9-y(Afte^3)=YKDC5d!1Bo>B*+EJ z4T1P6hdu~11?N0^(KITDUToG}#O|qvxOse-u`pz0_@p(7s1pDWvPqCB3+Qv01C0PM zd|v4l9?P$JfH~s?F}A{&&sK_>1)st7y`q2=EDMkenMM%mz==@ z#oGsim}82d+HGfJoQ}&S&i^R7EaWDbT9VgLX}?4L*4Sq-b&8Hf2e59a?PQ3IuB{T= z-9^b4TDqi&i}dfd>!n~AV%{PPW|>apf)@=!2hz4pjF`D0N9o_jGxMO#RFmrKO-3G0 zWAY_{yVWBUk1Y`tY4~8t0VJJ{%48EJg99lUN)alX2wC&vTh>(`C!e@fM%dp;q%91~=?0~3ndw7KRw&aAOR5sQJB(@2N;*)iCB!6s1gsJ?wVbsc~$CUyyafyjh ziUuA@(mfd!YI%~H73CuBgkC6?W{WM|+ceZa##zsUjVg;zkPmydYJf6Rs+!k3w;FCiEKo&Fe44n$FSkxbL1~>H(F-Gvi}F_kSSIyuDwLq9Ef0#jD@(1a%O1x zs?d)LHrv8L0i^U?(8SSFr6I8f@OP>_<&_4fVU$yx)-u{{O78ZpC^O1ir_ zV*$;LH70$ut}|cCEVYzKolQevj;Fg#YCwCh=82Qz9Aq)QjZ*{KFDn#;%Dd#|v}mw- z7mbqMW_2G3JqWRg|3r(1c`UeZ)OekXAW zl>Obx3*Q@O>@dzCQ8u43vU~|xD{zwpb_Io*5LN`zLt>tkJ5SY2LAxQ%S%3yeri8tr zflduT)Fx|eGn~Kx$bkt7AV>=i>ja)pk4kG@mok`eE@!oAGU=#XW%F2Gp*nIs$SYdY zH}U}mdG4*Nt9LcO`Mza@5g^OC@{4H$f&ZVSPqiiC50#7--*>#)3gftl%CHEPD_ z%pa>3_hGS;=IUO|RqK!|YcO7+S2|4m+WC!cyM3hXJI`$Gk_d;mHJ!(A##UVbcg7dt zHLF-3yK=ZgScH-n9(}ISRk|J#ZJv!cA1MaG-YYBCa%hE4J2FV{0*u?1#BolqDh(}( z4rd2nb}&KYZt=&Wf=@Oq`8i0)mbLm9C1ZX$061kX)-525?~d0K`8_muK}d zf*AYsX1=Nc1i%D4%YIroq{{bT)tmHzq-a4}ig)s4g|G~xylJn19ysxcr*@YUMI8Jy zqSF=)g2=pr*73Fov@{{0Iy*djHM%me@UU(IFd+2K$fyf8R@)-o7Y|Hu!xKI~czxpQ z231*b#c|E^u5)IcVh)tl3QSTRxrP*%T~v0V>Z}|sJTWrT2&J?m{s9PBxaIR5Q!Jr` zi-@IUMI~ZQ0w=+CzruILZ^*qjcD6iBt@d`x=}yvoCr)jTw_C@o?s5VIdHWpOQ4B=+ zLgiCRu~DEQ8wKHTrP`@jzDzbu+*w_)05zVF{1uOr5wPNXc2HvhzK||^XR1M!UDj`W z+Pvc5e)Ur2DVgc*HaYP!zVSplRaRIa_cK2bDbQ$bG@x&iCUDWhfKG<>1%cTvadMSk zJ(2a)zp6>l&TQGi*= zjVZ~Ys4{cQSwUx7CkJ2KZzDTDiz*t-aqn)>qxn!N;K4S`i4R=?$wD0cbD?8Rk6frK z=EJrnl}OY^S)!`68hZ|QeJ`XK<8{kNNT>jD$k~&!%qJ#ZS@nO5Vpd)R%2yj^Kreeq z#BnN99>^07cDs}d2{gXes>;L;C7w^?p#&lW<%#J3HB`3Y7tXQa(5Cw<&ThTx~wDX z<}PmWtSz3^#Kh_wfKzYz7Ny6T8g;uhO_jXtn>|m(u z82U)Dz6*rMQ4xrH?$m$Y)(SJeU82$I8Q@8R7DCCjgFIO!)f?rSrri{ zFY5@=?{%~jzL+pID|GguDCl2SFr_^7(H;n$8I#@GOQAC|Iz15A#tzxa0-pdtIs{{( zCm7+el8W*fdG5WiY$Cx;^sELnDOPPJiv`-P(&G6&CHkt2Vj^V<(*_nzg)7WH!dZn4@4t9`$VtH5LGLuYjAQ2Mb zc0$M_ITF<_9Av{Lz#tb)uJ57*z#>&LzP)iX;@TPhq&b!yD+RnB!-3fa(Hzjv{3;6$ zD!u*V2$uNQ?ebVhEW;K~YstdB~)M(~4bxCc(Iv2Bs%_-z*~+gcLgj`kr4 z*vf>Z+3d7wnH+U7=TWO&iYI!NFbOUp;)p-xkb_keO}9T3=>n~L-h|-?*e)aaH5hBL zJYJ_k0HfPYSSp*tAq}*7syhJ~;lg7}v8XCMMYTD?n@I?x>J5)X$O2N_5jm+9wLVnL^ftDFlK&?yaTV_n2^I;a5Bo=@%ujL*d3^jB&d?N zwbLmai3G^;wTvI_{As z_1OwWu(8B8~<;|$6ky^)~^ zUJx)2N)n=Gh=dyfaMilmc7>J(%t-cti%(NppPqzqw>!k$lMtkKFSwWFGTcWMkwA1p ztm48a+2#|R5Nai33(eqvWUExN^qP~TunwyPlD<*DA;9W!!$LUSy70PxCD~3TV$GIU z=EO!4_oF587@Ck*{n-W?kI)Tqq;O=7$5DzMwPoCOb=(zlZ|Y;qOhSr?a}}!1HYpk% zfxRkU=gGlsHDOS3?Tg|Ss-1Mmx~L~eP%D-$0_LBdg2zrDQ?i|G@?^6-na|D`4Iw8= zV6n{pP(Do?@39I&3U5=^DLLBl&GMBhrN$Y;)btJ9-xXhj6ze20Sqf(7vK-E4=%z$y z)MuZ2>UvS~+AqNpX;P<{Nk7mfa#4m1>A9N$KTrc3Ug=cq{ zhZGykbfqBC@ePNO&Kp? zT<@=kzv}L*g9qLBd3e;lyj~4|Gzn+=`!p=|*-Y!6hGV@tleBN`)$4FSoVNR3CG+aw zS<*jhm&f6*t#};Hb-mI3tXmI%*SFVgw^PZw)n{*dkEYL({*$g3!*_kdy${3Jw(phh zxRySA>Q!EY1^jp3RC@(kfUWUc+&rSyZCQ1*o!cj9m?-jk@^eJ()UJF@JR)R40!E;~r*Sf4F9d8ix zk6K-k+lSitd>5tfNyEzINS{^Ka4C=C$$Rblt9)_1u^P|Zwm46-bax+oY)s=3u$}CG zUpmOc^OjbTf0Z}z>3X;nN}k&O$Oo<4cYToV#v$J9B_@t*SzxR_OK$D=BI(y7vaI_* zHg-q%LrI@Exfl9ty&|`AU@bXB536sr!_!92>flQJmVJ8WyE<5n%yHV~NA^9E#0TXS zSI`40jI)!L#rPDx*2AG}gOlEr@w{IOecu}XK_#;}6MOB0yuiDP6kfD{gX*$}U3x!HGh?Nd z*px_KjJjMj|H3&{WtNIlY2k=p9n?;1)kK&is9r0AusW6Tr-Axow!qfdd?t%6wdy04 zAr%Ey0w=_PY$5}#@1tJb>ND0Mk(iKs@oKcNFQ2v2F~h@rXBK>HYsp`v78_P2yHga9 zpNdhwdKo?`%gNBoas8G2wN$LB$lw#bKkMDYh#YDs)h8KvzR~tsvXJBmziG!Cy)&Lq zT5PG)to@tz!WUGUsRz!q^XHAkxx7QP7SiK(+<&cISo>*=`dlmQO*8A;@Il`e=X-zS z8#b%TXnk2rp8hJUk?Uy9RYem8dmpT@9!7KKK5G@)6}vBF@q;YQN=GalNHwtCu0x3~ z88xlTuyA^mKl8z=?FLKYRXQcmv{rRRUVL8`zlX$RMUVFC0}~_S8q3U*?h>kovG-Bi z*;w~}!6BX9lWsZbRDBNO^MW0a7a zee~R!E+iY2q#nc`{A(6-_mj-#YCd?+^Y|IQv)nr&*xdf0&&& z-g(uK1_q4k!f4iD&9_Y)!h0)a8JNCnSjwJdN8gi2(y@GYtoy5TEB`<1J6N4h=<;0` zgMZf`q~A1c(n*Jd3vb3wIBBNcu+2;p9(Jaq#1YX#VX!!#qhbyp=yQ9amFzh}#R$nW^%i%}anA#Nm`kb0cSKC$oHoBw7v{LmL zqCc&=5$T^~IW|R);(tzqWN$3W9X=!cOFn=}g7{)8XZK3>*h2S+k@9|xuCAwSV=wow zGqEjiG5Ed4%ewK0Tl zwP!Sq-NW09Ev|N0CPBs>49fv%?yjrr0E^!%DU>m^q;2CL})rR^TwryWxD zZ1=4jvfhbW#ol?h7E%+5@K_>64f9UrlU=UrbIV->$a`u`@H(7qVisCTT8_iQGS)G-fr(rkuR`j=2sO0*3Q`514dyb@C^$zjv z_MX>@!*_YY(tLvHy~b>$kLyvZ7S)ArYx!OUGDE@?urTLh*X-0?>NU8p?xFA3=y{G? z2WUK;OY4R14f^htL7+F<6w2d0GrhI*US!Fk_N3o}VX%{|p{L>-GjjZeH=TJtgk6NPbE{+2ao>AV7Jm3>KjBp|=+(VY+pfy-K z95Xdsxoq#FWA9UBxF4&Tn9m`(JNZ3Dmr-A< zsL#s8&;C!(|DGL%+TF}AnY&#Ky-}vv^bqb$Jt;*u$00i98eW3cGR@n>D{vf+R zUf+c}NBdsGJ-h1!Ip%XSS-uwc!x_d9T_OI0N213*6vc1TNZ}PvTGFf4dHGK9VrP0M zb||jknMQcglQF&zChG}EW_W1p*;6z`n|;&azCUEjKbL;DvJksZh2XjBqHCpjGaf%z z9q>~N7>vnDW^dD3X1vh%%$50^=gpI@;E_@BtVvtgCjRSvQa-ene@>lLFXlt zu!UmyZ)phQ{i4|C6hM6-pZYm*p(8rj+aSSAXzA%+ncTQP4hCUjFq3#QwF8Cwp1Q?` zTzA=kj>~h4zAr7=NY2j1!jXq|IrYFGEBS&I=C0h8U2SPRojQ?T)m4@GvFo()mAP*G z7MZyc9V+?P=94J{EcIPY=%WR*zZYSwII@?u9f$XxRg~PU?)jb>Xr5_!Chw_*C)UaA z)PZMpsUW=9K2l>hRtAMe$PW!&tP5x7=?3@aSybfI*>m5CYg_hwoI|C92(R~f6@ONK zd5Q@R$Y9U;gPnQ`5LsybP4~jXt{UW$yQ4!z^HC=uPGq1VR$K^eSlssp^d0o${nG%~ z*?Ja0X6DJ3*(6T~&SYtwH~I+B`}w(OAK9SRJH$HXJ;hGRp2CuQss5StzKIk%!5GBP z-#Z7gCl%Ar_V7#koOBC*Ca}NCP^II;dLCGFN6RSx_pxCrLX2FSqS<|m@sVvd|`KJ8o6dM=ha5gECbZJa}#i_V*RZ||n(XF|D@ zc0MOXPU+;Gna9KE*_WA}&kpeKO#l3Bqp+PBeVufsDD%nvPLU;Rw$D3Z$M`*1sQqnK ze0w8pkORx!=)XVq@LY8<^attTBB8#`GhF-Wvc-6Z=Sk`L4K zb0k#XQC--{Sd%_C{a3BJ&SZ_*BIm5qw6Fe&T;-hm`O>*0?U&$MnpnE#^ScD1GQPg*9ibnb}Xr$mP1z z=s`|S!0rZClFcb5I`8*XG)#n3*>+>=?8)<(dr5~0l98EH!{z<&SdLqTAm8{6flu$# zc|at&f_@bCqf@S7dQePepey}(zIQazZ|$49(x<0i_jxBLe~{#EtUQ<~X3nxb8}+JRLtdT+*!*HY`VK4-2MF^iu< z5UDyB){KTv_aPd*ffDk{b8?-%}D45{u|ow@&!WE+uxXwSsS z%v!a)ei0FViZ;3SDCx1=M_{{`eO3I(`1d=~FauG@PL3G83K{NMH*&yEf1f=M{z1vU zF8fH8gY~gG2)6ZbH+^pABQ>=%ChBu?tLLpmh}nUE^MH5E;@KnJQcm)IS`$tW4(m4? z@w;a(^;CknwQ~ssfVudo=72$6|67Cb0=3ad0d@Frw<`Ai)3oCC(an^R-%M0AazaNk z6IS+!hEoJH%kccl{td_NT3G$Ry*AatPkzn6*@+1GnY1fS{OfGaPRVBc!LRJHdY6CW zYo^~{Wm$eP<&#yPXT0{wArlI^3Xp0x>T~RORbFNmU(}=D>&owWrz;H)Mw;94GyF2O4c?(tyGaNN9TUKh;Um8ZxnmIzw z);U90$IMey*Md6h)@Su>wVw`FCz&}Y41(M@VR@9ZLCaj5U&T1o(UJ< z7e0%D67euG*DsA_sgvY?&A_PmbNSQrV%OYh&FQ-pd|zk&po3~#v0yIzq-U8topeOX z*Cp#sQHf9XO$5ND<6XRSBDv^Z`CE(-j(8C+xW^o2{wUA*Q&-#FNl#^|k?!9jJEqaK z=dzrWU<6p&yY;K+erEDMj8YkE%OWw?L^!A3Y35IP%(Fsc&Fn`mF+2M@_6xSO{7pAQ j=i&b|Azf_g6#o^n_jxiAcjs@Nz<4UtwA+tm+t>dCx{hm$ diff --git a/icu4c/source/test/thaitest/thaitest.cpp b/icu4c/source/test/thaitest/thaitest.cpp index 33c9d7617b8..5d231136109 100644 --- a/icu4c/source/test/thaitest/thaitest.cpp +++ b/icu4c/source/test/thaitest/thaitest.cpp @@ -15,6 +15,7 @@ #include "unicode/uchriter.h" #include "unicode/brkiter.h" #include "unicode/locid.h" +#include "unicode/unistr.h" /* * This program takes a Unicode text file containing Thai text with @@ -269,13 +270,13 @@ UBool ThaiWordbreakTest::compareWordBreaks(const UChar *spaces, int32_t spaceCou */ void ThaiWordbreakTest::breakNotFound(int32_t br) { - if (fVerbose) { - printf("%d ****\n", br); - } else { - fprintf(stderr, "break not found: %d\n", br); - } - - fBreaksNotFound += 1; + if (fVerbose) { + printf("%d ****\n", br); + } else { + fprintf(stderr, "break not found: %d\n", br); + } + + fBreaksNotFound += 1; } /* @@ -284,13 +285,13 @@ void ThaiWordbreakTest::breakNotFound(int32_t br) */ void ThaiWordbreakTest::foundInvalidBreak(int32_t br) { - if (fVerbose) { - printf("**** %d\n", br); - } else { - fprintf(stderr, "found invalid break: %d\n", br); - } - - fInvalidBreaks += 1; + if (fVerbose) { + printf("**** %d\n", br); + } else { + fprintf(stderr, "found invalid break: %d\n", br); + } + + fInvalidBreaks += 1; } /* @@ -299,71 +300,54 @@ void ThaiWordbreakTest::foundInvalidBreak(int32_t br) */ const UChar *ThaiWordbreakTest::readFile(char *fileName, int32_t &charCount) { - FILE *f; - size_t bytesRead; - int32_t fileSize; - - UChar *buffer; - UChar bom; - - f = fopen(fileName, "rb"); - - if( f == NULL ) { - fprintf(stderr,"Couldn't open %s reason: %s \n", fileName, strerror(errno)); - return 0; - } - - fseek(f, 0, SEEK_END); - fileSize = ftell(f) - 2; // - 2 for BOM... - - // FIXME: should check for odd file size... - charCount = fileSize / 2; - - fseek(f, 0, SEEK_SET); - buffer = new UChar[charCount]; - - if(buffer == 0) { - fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno)); - fclose(f); - return 0; - } - - // read the BOM... - fread(&bom, 1, 2, f); - - bytesRead = 0; - - while (bytesRead < fileSize && ! feof(f)) { - bytesRead += fread(buffer + bytesRead, 1, fileSize - bytesRead, f); - - if( ferror(f) ) { - fprintf(stderr,"Couldn't read %s reason: %s \n", fileName, strerror(errno)); - fclose(f); - delete[] buffer; - return 0; - } - } - - fclose(f); - - // Swap bytes if the BOM is byte-swapped - if (bom == 0xFFFE) { - char *byteBuffer = (char *) buffer; - int32_t i; - - for (i = 0; i < fileSize; i += 2) { - char temp = byteBuffer[i]; - - byteBuffer[i] = byteBuffer[i + 1]; - byteBuffer[i + 1] = temp; - } - } else if (bom != 0xFEFF) { - fprintf(stderr, "File %s does not start with a Byte Order Mark: 0x%4.4X\n", fileName, bom); - delete[] buffer; - return 0; - } - - return buffer; + FILE *f; + int32_t fileSize; + + UChar *buffer; + char *bufferChars; + + f = fopen(fileName, "rb"); + + if( f == NULL ) { + fprintf(stderr,"Couldn't open %s reason: %s \n", fileName, strerror(errno)); + return 0; + } + + fseek(f, 0, SEEK_END); + fileSize = ftell(f); + + fseek(f, 0, SEEK_SET); + bufferChars = new char[fileSize]; + + if(bufferChars == 0) { + fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno)); + fclose(f); + return 0; + } + + fread(bufferChars, sizeof(char), fileSize, f); + if( ferror(f) ) { + fprintf(stderr,"Couldn't read %s reason: %s \n", fileName, strerror(errno)); + fclose(f); + delete[] bufferChars; + return 0; + } + fclose(f); + + UnicodeString myText(bufferChars, fileSize, "UTF-8"); + + charCount = myText.length(); + buffer = new UChar[charCount]; + if(buffer == 0) { + fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno)); + return 0; + } + + myText.extract(1, myText.length(), buffer); + charCount--; // skip the BOM + buffer[charCount] = 0; // NULL terminate for easier reading in the debugger + + return buffer; } /*