mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-1289 a simple utility class for finding script runs.
X-SVN-Rev: 6288
This commit is contained in:
parent
4bdda5fd78
commit
97a2b2a5ad
4 changed files with 296 additions and 0 deletions
87
icu4c/source/extra/scrptrun/Makefile.in
Normal file
87
icu4c/source/extra/scrptrun/Makefile.in
Normal file
|
@ -0,0 +1,87 @@
|
|||
## Makefile.in for ICU - extra/scrptrun
|
||||
## Copyright (c) 2001, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Platform-specific setup
|
||||
include @platform_make_fragment@
|
||||
|
||||
## Build directory information
|
||||
subdir = extra/scrptrun
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS)
|
||||
|
||||
## Target information
|
||||
TARGET = srtest
|
||||
|
||||
DEFS = @DEFS@
|
||||
CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)
|
||||
CFLAGS = @CFLAGS@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
ENABLE_RPATH = @ENABLE_RPATH@
|
||||
ifeq ($(ENABLE_RPATH),YES)
|
||||
RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
|
||||
endif
|
||||
LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
|
||||
INVOKE = $(LDLIBRARYPATH_ENVVAR)=$(top_builddir)/common:$(top_builddir)/i18n:$(top_builddir)/tools/toolutil:$$$(LDLIBRARYPATH_ENVVAR)
|
||||
LIBS = $(LIBICUUC) @LIBS@ @LIB_M@
|
||||
|
||||
OBJECTS = scrptrun.o srtest.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check check-local
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET)
|
||||
|
||||
install-local:
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(OBJECTS) $(TARGET)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) -o $@ $^ $(LIBS)
|
||||
|
||||
invoke:
|
||||
ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
62
icu4c/source/extra/scrptrun/scrptrun.cpp
Normal file
62
icu4c/source/extra/scrptrun/scrptrun.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* %W% %E%
|
||||
*
|
||||
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
|
||||
*
|
||||
*/
|
||||
|
||||
#include "layout/LETypes.h"
|
||||
#include "layout/LEScripts.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
#include "scrptrun.h"
|
||||
|
||||
le_bool ScriptRun::sameScript(le_int32 scriptOne, le_int32 scriptTwo)
|
||||
{
|
||||
return scriptOne <= qaaiScriptCode || scriptTwo <= qaaiScriptCode || scriptOne == scriptTwo;
|
||||
}
|
||||
|
||||
le_bool ScriptRun::next()
|
||||
{
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
|
||||
if (scriptEnd >= charLimit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
scriptCode = zyyyScriptCode;
|
||||
|
||||
for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
|
||||
LEUnicode16 high = charArray[scriptEnd];
|
||||
LEUnicode32 ch = high;
|
||||
|
||||
if (scriptEnd < charLimit - 1 && high >= 0xD800 && high <= 0xDBFF)
|
||||
{
|
||||
LEUnicode16 low = charArray[scriptEnd + 1];
|
||||
|
||||
if (low >= 0xDC00 && low <= 0xDFFF) {
|
||||
ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
|
||||
scriptEnd += 1;
|
||||
}
|
||||
}
|
||||
|
||||
le_int32 sc = uscript_getScript(ch, &error);
|
||||
|
||||
if (sameScript(scriptCode, sc)) {
|
||||
if (scriptCode <= qaaiScriptCode && sc > qaaiScriptCode) {
|
||||
scriptCode = sc;
|
||||
}
|
||||
} else {
|
||||
// if the run broke on a surrogate pair,
|
||||
// end it before the high surrogate
|
||||
if (ch >= 0x10000) {
|
||||
scriptEnd -= 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
109
icu4c/source/extra/scrptrun/scrptrun.h
Normal file
109
icu4c/source/extra/scrptrun/scrptrun.h
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* %W% %E%
|
||||
*
|
||||
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __SCRPTRUN_H
|
||||
#define __SCRPTRUN_H
|
||||
|
||||
#include "layout/LETypes.h"
|
||||
|
||||
struct ScriptRecord
|
||||
{
|
||||
LEUnicode32 startChar;
|
||||
LEUnicode32 endChar;
|
||||
le_int32 scriptCode;
|
||||
};
|
||||
|
||||
class ScriptRun
|
||||
{
|
||||
public:
|
||||
ScriptRun();
|
||||
|
||||
ScriptRun(const LEUnicode chars[], le_int32 length);
|
||||
|
||||
ScriptRun(const LEUnicode chars[], le_int32 start, le_int32 length);
|
||||
|
||||
void reset();
|
||||
|
||||
void reset(le_int32 start, le_int32 count);
|
||||
|
||||
void reset(const LEUnicode chars[], le_int32 start, le_int32 length);
|
||||
|
||||
le_int32 getScriptStart();
|
||||
|
||||
le_int32 getScriptEnd();
|
||||
|
||||
le_int32 getScriptCode();
|
||||
|
||||
le_bool next();
|
||||
|
||||
private:
|
||||
|
||||
static le_bool sameScript(le_int32 scriptOne, le_int32 scriptTwo);
|
||||
|
||||
le_int32 charStart;
|
||||
le_int32 charLimit;
|
||||
const LEUnicode *charArray;
|
||||
|
||||
le_int32 scriptStart;
|
||||
le_int32 scriptEnd;
|
||||
le_int32 scriptCode;
|
||||
};
|
||||
|
||||
inline ScriptRun::ScriptRun()
|
||||
{
|
||||
reset(NULL, 0, 0);
|
||||
}
|
||||
|
||||
inline ScriptRun::ScriptRun(const LEUnicode chars[], le_int32 length)
|
||||
{
|
||||
reset(chars, 0, length);
|
||||
}
|
||||
|
||||
inline ScriptRun::ScriptRun(const LEUnicode chars[], le_int32 start, le_int32 length)
|
||||
{
|
||||
reset(chars, start, length);
|
||||
}
|
||||
|
||||
inline le_int32 ScriptRun::getScriptStart()
|
||||
{
|
||||
return scriptStart;
|
||||
}
|
||||
|
||||
inline le_int32 ScriptRun::getScriptEnd()
|
||||
{
|
||||
return scriptEnd;
|
||||
}
|
||||
|
||||
inline le_int32 ScriptRun::getScriptCode()
|
||||
{
|
||||
return scriptCode;
|
||||
}
|
||||
|
||||
inline void ScriptRun::reset()
|
||||
{
|
||||
scriptStart = charStart;
|
||||
scriptEnd = charStart;
|
||||
scriptCode = -1;
|
||||
}
|
||||
|
||||
inline void ScriptRun::reset(le_int32 start, le_int32 length)
|
||||
{
|
||||
charStart = start;
|
||||
charLimit = start + length;
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
inline void ScriptRun::reset(const LEUnicode chars[], le_int32 start, le_int32 length)
|
||||
{
|
||||
charArray = chars;
|
||||
|
||||
reset(start, length);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
38
icu4c/source/extra/scrptrun/srtest.cpp
Normal file
38
icu4c/source/extra/scrptrun/srtest.cpp
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* %W% %E%
|
||||
*
|
||||
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
|
||||
*
|
||||
*/
|
||||
|
||||
#include "layout/LETypes.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
#include "scrptrun.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
LEUnicode testChars[] = {
|
||||
0x0020, 0x0946, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0020,
|
||||
0x0627, 0x0644, 0x0639, 0x0631, 0x0628, 0x064A, 0x0629, 0x0020,
|
||||
0x0420, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438, 0x0439, 0x0020,
|
||||
'E', 'n', 'g', 'l', 'i', 's', 'h', 0x0020,
|
||||
0x6F22, 0x5B75, 0x3068, 0x3072, 0x3089, 0x304C, 0x306A, 0x3068,
|
||||
0x30AB, 0x30BF, 0x30AB, 0x30CA,
|
||||
0xD801, 0xDC00, 0xD801, 0xDC01, 0xD801, 0xDC02, 0xD801, 0xDC03
|
||||
};
|
||||
|
||||
le_int32 testLength = sizeof testChars / sizeof testChars[0];
|
||||
|
||||
void main()
|
||||
{
|
||||
ScriptRun scriptRun(testChars, 0, testLength);
|
||||
|
||||
while (scriptRun.next()) {
|
||||
le_int32 start = scriptRun.getScriptStart();
|
||||
le_int32 end = scriptRun.getScriptEnd();
|
||||
le_int32 code = scriptRun.getScriptCode();
|
||||
|
||||
printf("Script '%s' from %d to %d.\n", uscript_getName((UScriptCode) code), start, end);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue