ICU-1289 a simple utility class for finding script runs.

X-SVN-Rev: 6288
This commit is contained in:
Eric Mader 2001-10-17 21:17:16 +00:00
parent 4bdda5fd78
commit 97a2b2a5ad
4 changed files with 296 additions and 0 deletions

View file

@ -0,0 +1,87 @@
## Makefile.in for ICU - extra/scrptrun
## Copyright (c) 2001, International Business Machines Corporation and
## others. All Rights Reserved.
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../..
include $(top_builddir)/icudefs.mk
## Platform-specific setup
include @platform_make_fragment@
## Build directory information
subdir = extra/scrptrun
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS)
## Target information
TARGET = srtest
DEFS = @DEFS@
CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)
CFLAGS = @CFLAGS@
CXXFLAGS = @CXXFLAGS@
ENABLE_RPATH = @ENABLE_RPATH@
ifeq ($(ENABLE_RPATH),YES)
RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
endif
LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
INVOKE = $(LDLIBRARYPATH_ENVVAR)=$(top_builddir)/common:$(top_builddir)/i18n:$(top_builddir)/tools/toolutil:$$$(LDLIBRARYPATH_ENVVAR)
LIBS = $(LIBICUUC) @LIBS@ @LIB_M@
OBJECTS = scrptrun.o srtest.o
DEPS = $(OBJECTS:.o=.d)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local dist dist-local check check-local
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(TARGET)
install-local:
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(OBJECTS) $(TARGET)
distclean-local: clean-local
$(RMV) Makefile
check-local: all-local
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(TARGET) : $(OBJECTS)
$(LINK.cc) -o $@ $^ $(LIBS)
invoke:
ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View file

@ -0,0 +1,62 @@
/*
* %W% %E%
*
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
*
*/
#include "layout/LETypes.h"
#include "layout/LEScripts.h"
#include "unicode/uscript.h"
#include "scrptrun.h"
le_bool ScriptRun::sameScript(le_int32 scriptOne, le_int32 scriptTwo)
{
return scriptOne <= qaaiScriptCode || scriptTwo <= qaaiScriptCode || scriptOne == scriptTwo;
}
le_bool ScriptRun::next()
{
UErrorCode error = U_ZERO_ERROR;
if (scriptEnd >= charLimit) {
return false;
}
scriptCode = zyyyScriptCode;
for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
LEUnicode16 high = charArray[scriptEnd];
LEUnicode32 ch = high;
if (scriptEnd < charLimit - 1 && high >= 0xD800 && high <= 0xDBFF)
{
LEUnicode16 low = charArray[scriptEnd + 1];
if (low >= 0xDC00 && low <= 0xDFFF) {
ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
scriptEnd += 1;
}
}
le_int32 sc = uscript_getScript(ch, &error);
if (sameScript(scriptCode, sc)) {
if (scriptCode <= qaaiScriptCode && sc > qaaiScriptCode) {
scriptCode = sc;
}
} else {
// if the run broke on a surrogate pair,
// end it before the high surrogate
if (ch >= 0x10000) {
scriptEnd -= 1;
}
break;
}
}
return true;
}

View file

@ -0,0 +1,109 @@
/*
* %W% %E%
*
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
*
*/
#ifndef __SCRPTRUN_H
#define __SCRPTRUN_H
#include "layout/LETypes.h"
struct ScriptRecord
{
LEUnicode32 startChar;
LEUnicode32 endChar;
le_int32 scriptCode;
};
class ScriptRun
{
public:
ScriptRun();
ScriptRun(const LEUnicode chars[], le_int32 length);
ScriptRun(const LEUnicode chars[], le_int32 start, le_int32 length);
void reset();
void reset(le_int32 start, le_int32 count);
void reset(const LEUnicode chars[], le_int32 start, le_int32 length);
le_int32 getScriptStart();
le_int32 getScriptEnd();
le_int32 getScriptCode();
le_bool next();
private:
static le_bool sameScript(le_int32 scriptOne, le_int32 scriptTwo);
le_int32 charStart;
le_int32 charLimit;
const LEUnicode *charArray;
le_int32 scriptStart;
le_int32 scriptEnd;
le_int32 scriptCode;
};
inline ScriptRun::ScriptRun()
{
reset(NULL, 0, 0);
}
inline ScriptRun::ScriptRun(const LEUnicode chars[], le_int32 length)
{
reset(chars, 0, length);
}
inline ScriptRun::ScriptRun(const LEUnicode chars[], le_int32 start, le_int32 length)
{
reset(chars, start, length);
}
inline le_int32 ScriptRun::getScriptStart()
{
return scriptStart;
}
inline le_int32 ScriptRun::getScriptEnd()
{
return scriptEnd;
}
inline le_int32 ScriptRun::getScriptCode()
{
return scriptCode;
}
inline void ScriptRun::reset()
{
scriptStart = charStart;
scriptEnd = charStart;
scriptCode = -1;
}
inline void ScriptRun::reset(le_int32 start, le_int32 length)
{
charStart = start;
charLimit = start + length;
reset();
}
inline void ScriptRun::reset(const LEUnicode chars[], le_int32 start, le_int32 length)
{
charArray = chars;
reset(start, length);
}
#endif

View file

@ -0,0 +1,38 @@
/*
* %W% %E%
*
* (C) Copyright IBM Corp. 2001 - All Rights Reserved
*
*/
#include "layout/LETypes.h"
#include "unicode/uscript.h"
#include "scrptrun.h"
#include <stdio.h>
LEUnicode testChars[] = {
0x0020, 0x0946, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0020,
0x0627, 0x0644, 0x0639, 0x0631, 0x0628, 0x064A, 0x0629, 0x0020,
0x0420, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438, 0x0439, 0x0020,
'E', 'n', 'g', 'l', 'i', 's', 'h', 0x0020,
0x6F22, 0x5B75, 0x3068, 0x3072, 0x3089, 0x304C, 0x306A, 0x3068,
0x30AB, 0x30BF, 0x30AB, 0x30CA,
0xD801, 0xDC00, 0xD801, 0xDC01, 0xD801, 0xDC02, 0xD801, 0xDC03
};
le_int32 testLength = sizeof testChars / sizeof testChars[0];
void main()
{
ScriptRun scriptRun(testChars, 0, testLength);
while (scriptRun.next()) {
le_int32 start = scriptRun.getScriptStart();
le_int32 end = scriptRun.getScriptEnd();
le_int32 code = scriptRun.getScriptCode();
printf("Script '%s' from %d to %d.\n", uscript_getName((UScriptCode) code), start, end);
}
}