ICU-10673 AnyTransliterator thread safety fix.

X-SVN-Rev: 35248
This commit is contained in:
Andy Heninger 2014-02-27 18:45:32 +00:00
parent 36bd9445a2
commit 52b3ba1caa
3 changed files with 125 additions and 23 deletions

View file

@ -1,6 +1,6 @@
/*
*****************************************************************
* Copyright (c) 2002-2011, International Business Machines Corporation
* Copyright (c) 2002-2014, International Business Machines Corporation
* and others. All Rights Reserved.
*****************************************************************
* Date Name Description
@ -14,13 +14,15 @@
#include "unicode/uobject.h"
#include "unicode/uscript.h"
#include "nultrans.h"
#include "anytrans.h"
#include "uvector.h"
#include "tridpars.h"
#include "hash.h"
#include "mutex.h"
#include "nultrans.h"
#include "putilimp.h"
#include "tridpars.h"
#include "uinvchar.h"
#include "uvector.h"
//------------------------------------------------------------
// Constants
@ -39,7 +41,7 @@ U_CDECL_BEGIN
*/
static void U_CALLCONV
_deleteTransliterator(void *obj) {
delete (icu::Transliterator*) obj;
delete (icu::Transliterator*) obj;
}
U_CDECL_END
@ -85,7 +87,7 @@ public:
* The end of the run, exclusive, valid after next() returns.
*/
int32_t limit;
/**
* Constructs a run iterator over the given text from start
* (inclusive) to limit (exclusive).
@ -180,7 +182,7 @@ AnyTransliterator::AnyTransliterator(const UnicodeString& id,
UScriptCode theTargetScript,
UErrorCode& ec) :
Transliterator(id, NULL),
targetScript(theTargetScript)
targetScript(theTargetScript)
{
cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
if (U_FAILURE(ec)) {
@ -239,7 +241,7 @@ void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& p
// Try to instantiate transliterator from it.scriptCode to
// our target or target/variant
Transliterator* t = getTransliterator(it.scriptCode);
if (t == NULL) {
// We have no transliterator. Do nothing, but keep
// pos.start up to date.
@ -251,7 +253,7 @@ void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& p
// a non-incremental transliteration. Otherwise do an
// incremental one.
UBool incremental = isIncremental && (it.limit >= allLimit);
pos.start = uprv_max(allStart, it.start);
pos.limit = uprv_min(allLimit, it.limit);
int32_t limit = pos.limit;
@ -275,17 +277,21 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
return NULL;
}
Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source);
Transliterator* t = NULL;
{
Mutex m(NULL);
t = (Transliterator*) uhash_iget(cache, (int32_t) source);
}
if (t == NULL) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeString sourceName(uscript_getName(source), -1, US_INV);
UnicodeString id(sourceName);
id.append(TARGET_SEP).append(target);
t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
if (U_FAILURE(ec) || t == NULL) {
delete t;
// Try to pivot around Latin, our most common script
id = sourceName;
id.append(LATIN_PIVOT, -1).append(target);
@ -297,10 +303,23 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
}
if (t != NULL) {
uhash_iput(cache, (int32_t) source, t, &ec);
Transliterator *rt = NULL;
{
Mutex m(NULL);
rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source));
if (rt == NULL) {
// Common case, no race to cache this new transliterator.
uhash_iput(cache, (int32_t) source, t, &ec);
} else {
// Race case, some other thread beat us to caching this transliterator.
Transliterator *temp = rt;
rt = t; // Our newly created transliterator that lost the race & now needs deleting.
t = temp; // The transliterator from the cache that we will return.
}
}
delete rt; // will be non-null only in case of races.
}
}
return t;
}
@ -313,7 +332,7 @@ static UScriptCode scriptNameToCode(const UnicodeString& name) {
UErrorCode ec = U_ZERO_ERROR;
int32_t nameLen = name.length();
UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen);
if (isInvariant) {
name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV);
buf[127] = 0; // Make sure that we NULL terminate the string.
@ -352,7 +371,7 @@ void AnyTransliterator::registerIDs() {
if (seen.geti(target) != 0) continue;
ec = U_ZERO_ERROR;
seen.puti(target, 1, ec);
// Get the script code for the target. If not a script, ignore.
UScriptCode targetScript = scriptNameToCode(target);
if (targetScript == USCRIPT_INVALID_CODE) continue;
@ -362,7 +381,7 @@ void AnyTransliterator::registerIDs() {
for (int32_t v=0; v<variantCount; ++v) {
UnicodeString variant;
Transliterator::_getAvailableVariant(v, source, target, variant);
UnicodeString id;
TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), target, variant, id);
ec = U_ZERO_ERROR;

View file

@ -27,7 +27,7 @@
#include "intltest.h"
#include "tsmthred.h"
#include "unicode/ushape.h"
#include "unicode/translit.h"
#if U_PLATFORM_USES_ONLY_WIN32_API
/* Prefer native Windows APIs even if POSIX is implemented (i.e., on Cygwin). */
@ -188,14 +188,20 @@ void MultithreadTest::runIndexedTest( int32_t index, UBool exec,
}
break;
case 5:
case 5:
name = "TestArabicShapingThreads";
if (exec) {
TestArabicShapingThreads();
}
break;
case 6:
name = "TestAnyTranslit";
if (exec) {
TestAnyTranslit();
}
break;
default:
name = "";
break; //needed to end loop
@ -1518,6 +1524,81 @@ cleanupAndReturn:
}
delete testString;
}
};
// Test for ticket #10673, race in cache code in AnyTransliterator.
// It's difficult to make the original unsafe code actually fail, but
// this test will fairly reliably take the code path for races in
// populating the cache.
#if !UCONFIG_NO_TRANSLITERATION
class TxThread: public SimpleThread {
private:
Transliterator *fSharedTranslit;
public:
UBool fSuccess;
TxThread(Transliterator *tx) : fSharedTranslit(tx), fSuccess(FALSE) {};
~TxThread();
void run();
};
TxThread::~TxThread() {};
void TxThread::run() {
UnicodeString greekString("\\u03B4\\u03B9\\u03B1\\u03C6\\u03BF\\u03C1\\u03B5\\u03C4\\u03B9\\u03BA\\u03BF\\u03CD\\u03C2");
greekString = greekString.unescape();
fSharedTranslit->transliterate(greekString);
fSuccess = greekString[0] == 0x64; // 'd'. The whole transliterated string is "diaphoretikous" (accented u).
}
#endif
void MultithreadTest::TestAnyTranslit() {
#if !UCONFIG_NO_TRANSLITERATION
UErrorCode status = U_ZERO_ERROR;
LocalPointer<Transliterator> tx(Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, status));
if (U_FAILURE(status)) {
errln("File %s, Line %d: Error, status = %s", __FILE__, __LINE__, u_errorName(status));
return;
}
TxThread * threads[4];
int32_t i;
for (i=0; i<4; i++) {
threads[i] = new TxThread(tx.getAlias());
}
for (i=0; i<4; i++) {
threads[i]->start();
}
int32_t patience = 100;
UBool success;
UBool someThreadRunning;
do {
someThreadRunning = FALSE;
success = TRUE;
for (i=0; i<4; i++) {
if (threads[i]->isRunning()) {
someThreadRunning = TRUE;
SimpleThread::sleep(10);
break;
} else {
if (threads[i]->fSuccess == FALSE) {
success = FALSE;
}
}
}
} while (someThreadRunning && --patience > 0);
if (patience <= 0) {
errln("File %s, Line %d: Error, one or more threads did not complete.", __FILE__, __LINE__);
}
if (success == FALSE) {
errln("File %s, Line %d: Error, transliteration result incorrect.", __FILE__, __LINE__);
}
for (i=0; i<4; i++) {
delete threads[i];
}
#endif // !UCONFIG_NO_TRANSLITERATION
}
#endif // ICU_USE_THREADS

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines Corporation and
* Copyright (c) 1997-2014, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -44,8 +44,10 @@ public:
**/
void TestThreadedIntl(void);
#endif
void TestCollators(void);
void TestString();
void TestCollators(void);
void TestString();
void TestAnyTranslit();
};
#endif