mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-3501 Better language detection and parsing
X-SVN-Rev: 15973
This commit is contained in:
parent
eac013caa3
commit
caa45d1968
1 changed files with 124 additions and 107 deletions
|
@ -18,7 +18,7 @@ import org.apache.xerces.dom.ElementImpl;
|
|||
import org.apache.xerces.dom.TextImpl;
|
||||
import org.apache.xerces.parsers.DOMParser;
|
||||
import org.w3c.dom.*;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.*;
|
||||
|
||||
/**
|
||||
* This imports XLIFF files into RBManager.
|
||||
|
@ -59,7 +59,7 @@ public class RBxliffImporter extends RBImporter {
|
|||
DOMParser parser = new DOMParser();
|
||||
parser.parse(is);
|
||||
xlf_xml = (DocumentImpl)parser.getDocument();
|
||||
} catch (Exception e) {
|
||||
} catch (SAXException e) {
|
||||
RBManagerGUI.debugMsg(e.getMessage());
|
||||
e.printStackTrace(System.err);
|
||||
}
|
||||
|
@ -69,7 +69,9 @@ public class RBxliffImporter extends RBImporter {
|
|||
}
|
||||
|
||||
private void importDoc() {
|
||||
if (xlf_xml == null) return;
|
||||
if (xlf_xml == null)
|
||||
return;
|
||||
String language = null;
|
||||
ElementImpl root = (ElementImpl)xlf_xml.getDocumentElement();
|
||||
Node fileNode = root.getFirstChild();
|
||||
Node node = null;
|
||||
|
@ -90,33 +92,54 @@ public class RBxliffImporter extends RBImporter {
|
|||
|
||||
String sourceLocale = ((ElementImpl)fileNode).getAttribute("source-language");
|
||||
String targetLocale = ((ElementImpl)fileNode).getAttribute("target-language");
|
||||
Vector localeNames = new Vector();
|
||||
if (!sourceLocale.equals("")) {
|
||||
localeNames.add(sourceLocale);
|
||||
language = sourceLocale;
|
||||
}
|
||||
if (!targetLocale.equals("")) {
|
||||
localeNames.add(targetLocale);
|
||||
// The target language is the real data. The source is only for reference.
|
||||
// We could do verification that all the data is translated the same though.
|
||||
language = targetLocale;
|
||||
}
|
||||
resolveEncodings(localeNames);
|
||||
|
||||
// Now do the actual import resource by resource
|
||||
NodeList tu_list = body.getElementsByTagName("group");
|
||||
int body_nodes_length = body.getChildNodes().getLength();
|
||||
NodeList body_list = body.getChildNodes();
|
||||
int groupCount = 0;
|
||||
int groupCount = 0, elementCount = 0;
|
||||
Node last_group_node = null;
|
||||
for (int i=0; i < body_nodes_length; i++) {
|
||||
Node body_elem = body_list.item(i);
|
||||
if (body_elem.getNodeType() == Node.ELEMENT_NODE && body_elem.getNodeName().equalsIgnoreCase("group")) {
|
||||
groupCount++;
|
||||
last_group_node = body_elem;
|
||||
if (body_elem.getNodeType() == Node.ELEMENT_NODE) {
|
||||
if (body_elem.getNodeName().equalsIgnoreCase("group")) {
|
||||
groupCount++;
|
||||
last_group_node = body_elem;
|
||||
}
|
||||
elementCount++;
|
||||
}
|
||||
}
|
||||
if (groupCount == 1) {
|
||||
if (elementCount == 1 && groupCount == 1) {
|
||||
// ICU style group where the top group is just the locale.
|
||||
ElementImpl localeNode = (ElementImpl)last_group_node;
|
||||
tu_list = last_group_node.getChildNodes();
|
||||
String rootGroupName = localeNode.getAttribute("id");
|
||||
if (rootGroupName != null && rootGroupName.equals("root")) {
|
||||
rootGroupName = "";
|
||||
}
|
||||
// It's done this way because ICU handles rfc3066bis (the successor of rfc3066)
|
||||
// XLIFF requires rfc3066, which doesn't handle scripts.
|
||||
language = rootGroupName;
|
||||
}
|
||||
|
||||
// Add the locale if needed, and normalize it to the correct format.
|
||||
Vector localeNames = new Vector();
|
||||
char array[] = language.toCharArray();
|
||||
for (int k=0; k < array.length; k++) {
|
||||
if (array[k] == '-')
|
||||
array[k] = '_';
|
||||
}
|
||||
language = String.valueOf(array);
|
||||
localeNames.add(language);
|
||||
resolveEncodings(localeNames);
|
||||
|
||||
for (int i=0; i < tu_list.getLength(); i++) {
|
||||
if (!(tu_list.item(i) instanceof ElementImpl)) {
|
||||
|
@ -135,109 +158,103 @@ public class RBxliffImporter extends RBImporter {
|
|||
//NodeList group_list = tu_elem.getElementsByTagName("group");
|
||||
}
|
||||
|
||||
if (group == null || group.length() < 1)
|
||||
if (group == null || group.length() < 1) {
|
||||
group = getDefaultGroup();
|
||||
parseTranslationUnit(language, group, tu_elem);
|
||||
}
|
||||
|
||||
NodeList trans_unit_list = tu_elem.getElementsByTagName("trans-unit");
|
||||
// For each trans-unit element
|
||||
for (int j=0; j < trans_unit_list.getLength(); j++) {
|
||||
ElementImpl trans_unit_elem = (ElementImpl)trans_unit_list.item(j);
|
||||
ElementImpl source_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("source").item(0);
|
||||
ElementImpl target_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("target").item(0);
|
||||
ElementImpl note_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("note").item(0);
|
||||
if (target_elem == null) {
|
||||
target_elem = source_elem;
|
||||
}
|
||||
String language = target_elem.getAttribute("xml:lang");
|
||||
// Get the current encoding
|
||||
if (language == null)
|
||||
continue;
|
||||
char array[] = language.toCharArray();
|
||||
for (int k=0; k < array.length; k++) {
|
||||
if (array[k] == '-')
|
||||
array[k] = '_';
|
||||
}
|
||||
language = String.valueOf(array);
|
||||
// Get the translation value
|
||||
if (target_elem.getLength() < 1)
|
||||
continue;
|
||||
target_elem.normalize();
|
||||
NodeList text_list = target_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1)
|
||||
continue;
|
||||
TextImpl text_elem = (TextImpl)text_list.item(0);
|
||||
String value = text_elem.getNodeValue();
|
||||
if (value == null || value.length() < 1)
|
||||
continue;
|
||||
/*NamedNodeMap attribMap = trans_unit_elem.getAttributes();
|
||||
for (int k = 0; k < attribMap.getLength(); k++) {
|
||||
String attribMapName = attribMap.item(k).getNodeName();
|
||||
System.out.println(attribMapName);
|
||||
}*/
|
||||
name = trans_unit_elem.getAttribute("id");
|
||||
if (name == null || name.length() < 1)
|
||||
continue;
|
||||
// Create the bundle item
|
||||
BundleItem item = new BundleItem(null, name, value);
|
||||
// Get creation, modification values
|
||||
/*item.setCreatedDate(tuv_elem.getAttribute("creationdate"));
|
||||
item.setModifiedDate(tuv_elem.getAttribute("changedate"));
|
||||
if (tuv_elem.getAttribute("changeid") != null) item.setModifier(tuv_elem.getAttribute("changeid"));
|
||||
if (tuv_elem.getAttribute("creationid") != null) item.setCreator(tuv_elem.getAttribute("creationid"));*/
|
||||
// Get properties specified
|
||||
/* NodeList prop_list = tuv_elem.getElementsByTagName("prop");
|
||||
Hashtable lookups = null;
|
||||
for (int k=0; k < prop_list.getLength(); k++) {
|
||||
ElementImpl prop_elem = (ElementImpl)prop_list.item(k);
|
||||
String type = prop_elem.getAttribute("type");
|
||||
if (type != null && type.equals("x-Comment")) {
|
||||
// Get the comment
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
String comment = text_elem.getNodeValue();
|
||||
if (comment != null && comment.length() > 0) item.setComment(comment);
|
||||
} else if (type != null && type.equals("x-Translated")) {
|
||||
// Get the translated flag value
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
if (text_elem.getNodeValue() != null) {
|
||||
if (text_elem.getNodeValue().equalsIgnoreCase("true")) item.setTranslated(true);
|
||||
else if (text_elem.getNodeValue().equalsIgnoreCase("false")) item.setTranslated(false);
|
||||
else item.setTranslated(getDefaultTranslated());
|
||||
} else item.setTranslated(getDefaultTranslated());
|
||||
} else if (type != null && type.equals("x-Lookup")) {
|
||||
// Get a lookup value
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
if (text_elem.getNodeValue() != null) {
|
||||
String text = text_elem.getNodeValue();
|
||||
if (text.indexOf("=") > 0) {
|
||||
try {
|
||||
if (lookups == null)
|
||||
lookups = new Hashtable();
|
||||
String lkey = text.substring(0,text.indexOf("="));
|
||||
String lvalue = text.substring(text.indexOf("=")+1,text.length());
|
||||
lookups.put(lkey, lvalue);
|
||||
} catch (Exception ex) {
|
||||
//String out of bounds - Ignore and go on
|
||||
}
|
||||
}
|
||||
} else item.setTranslated(getDefaultTranslated());
|
||||
}
|
||||
}*/
|
||||
// if (lookups != null)
|
||||
// item.setLookups(lookups);
|
||||
importResource(item, language, group);
|
||||
parseTranslationUnit(language, group, (ElementImpl)trans_unit_list.item(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void parseTranslationUnit(String language, String group, ElementImpl trans_unit_elem) {
|
||||
// Get the translation value
|
||||
ElementImpl target_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("target").item(0);
|
||||
if (target_elem == null) {
|
||||
// This is a template, or a skeleton
|
||||
target_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("source").item(0);
|
||||
}
|
||||
ElementImpl note_elem = (ElementImpl)trans_unit_elem.getElementsByTagName("note").item(0);
|
||||
if (target_elem.getLength() < 1)
|
||||
return;
|
||||
target_elem.normalize();
|
||||
NodeList text_list = target_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1)
|
||||
return;
|
||||
TextImpl text_elem = (TextImpl)text_list.item(0);
|
||||
String value = text_elem.getNodeValue();
|
||||
if (value == null || value.length() < 1)
|
||||
return;
|
||||
/*NamedNodeMap attribMap = trans_unit_elem.getAttributes();
|
||||
for (int k = 0; k < attribMap.getLength(); k++) {
|
||||
String attribMapName = attribMap.item(k).getNodeName();
|
||||
System.out.println(attribMapName);
|
||||
}*/
|
||||
String name = trans_unit_elem.getAttribute("id");
|
||||
if (name == null || name.length() < 1)
|
||||
return;
|
||||
// Create the bundle item
|
||||
BundleItem item = new BundleItem(null, name, value);
|
||||
// Get creation, modification values
|
||||
/*item.setCreatedDate(tuv_elem.getAttribute("creationdate"));
|
||||
item.setModifiedDate(tuv_elem.getAttribute("changedate"));
|
||||
if (tuv_elem.getAttribute("changeid") != null) item.setModifier(tuv_elem.getAttribute("changeid"));
|
||||
if (tuv_elem.getAttribute("creationid") != null) item.setCreator(tuv_elem.getAttribute("creationid"));*/
|
||||
// Get properties specified
|
||||
/* NodeList prop_list = tuv_elem.getElementsByTagName("prop");
|
||||
Hashtable lookups = null;
|
||||
for (int k=0; k < prop_list.getLength(); k++) {
|
||||
ElementImpl prop_elem = (ElementImpl)prop_list.item(k);
|
||||
String type = prop_elem.getAttribute("type");
|
||||
if (type != null && type.equals("x-Comment")) {
|
||||
// Get the comment
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
String comment = text_elem.getNodeValue();
|
||||
if (comment != null && comment.length() > 0) item.setComment(comment);
|
||||
} else if (type != null && type.equals("x-Translated")) {
|
||||
// Get the translated flag value
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
if (text_elem.getNodeValue() != null) {
|
||||
if (text_elem.getNodeValue().equalsIgnoreCase("true")) item.setTranslated(true);
|
||||
else if (text_elem.getNodeValue().equalsIgnoreCase("false")) item.setTranslated(false);
|
||||
else item.setTranslated(getDefaultTranslated());
|
||||
} else item.setTranslated(getDefaultTranslated());
|
||||
} else if (type != null && type.equals("x-Lookup")) {
|
||||
// Get a lookup value
|
||||
prop_elem.normalize();
|
||||
text_list = prop_elem.getChildNodes();
|
||||
if (text_list.getLength() < 1) continue;
|
||||
text_elem = (TextImpl)text_list.item(0);
|
||||
if (text_elem.getNodeValue() != null) {
|
||||
String text = text_elem.getNodeValue();
|
||||
if (text.indexOf("=") > 0) {
|
||||
try {
|
||||
if (lookups == null)
|
||||
lookups = new Hashtable();
|
||||
String lkey = text.substring(0,text.indexOf("="));
|
||||
String lvalue = text.substring(text.indexOf("=")+1,text.length());
|
||||
lookups.put(lkey, lvalue);
|
||||
} catch (Exception ex) {
|
||||
//String out of bounds - Ignore and go on
|
||||
}
|
||||
}
|
||||
} else item.setTranslated(getDefaultTranslated());
|
||||
}
|
||||
}*/
|
||||
// if (lookups != null)
|
||||
// item.setLookups(lookups);
|
||||
importResource(item, language, group);
|
||||
}
|
||||
private Vector getEncodingsVector(ElementImpl body) {
|
||||
String empty = "";
|
||||
if (body == null) return null;
|
||||
|
|
Loading…
Add table
Reference in a new issue