diff --git a/icu4j/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java b/icu4j/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java new file mode 100644 index 00000000000..572e5c4eb6b --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java @@ -0,0 +1,346 @@ +/* + ************************************************************************** + * Copyright (C) 2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ************************************************************************** + * + */ + +package com.ibm.icu.dev.demo.charsetdet; + +import java.awt.event.*; +import java.awt.*; +import java.io.*; +import java.net.URL; + +import javax.swing.*; + +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +/** + * This simple application demonstrates how to use the CharsetDetector API. It + * opens a file or web page, detects the encoding, and then displays it using that + * encoding. + */ +public class DetectingViewer extends JFrame implements ActionListener +{ + + private JTextPane text; + private JFileChooser fileChooser; + + /** + * @throws java.awt.HeadlessException + */ + public DetectingViewer() + { + super(); + + fileChooser = new JFileChooser(); + + setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + setSize(800, 800); + + setJMenuBar(makeMenus()); + text = new JTextPane(); + text.setContentType("text/plain"); + text.setText(""); + text.setSize(800, 800); + + Font font = new Font("Arial Unicode MS", Font.PLAIN, 24); + text.setFont(font); + + JScrollPane scrollPane = new JScrollPane(text); + + getContentPane().add(scrollPane); + setVisible(true); + } + + public void actionPerformed(ActionEvent event) + { + String cmd = event.getActionCommand(); + + if (cmd.equals("New...")) { + doNew(); + } else if (cmd.equals("Open File...")) { + doOpenFile(); + } else if (cmd.equals("Open URL...")) { + doOpenURL(); + } else if (cmd.equals("Quit")) { + doQuit(); + } + } + + public static void main(String[] args) + { + new DetectingViewer(); + } + + private void errorDialog(String title, String msg) + { + JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE); + } + + private BufferedInputStream openFile(File file) + { + FileInputStream fileStream = null; + + try { + fileStream = new FileInputStream(file); + } catch (Exception e) { + errorDialog("Error Opening File", e.getMessage()); + return null; + } + + return new BufferedInputStream(fileStream); + } + + private void openFile(String directory, String filename) + { + openFile(new File(directory, filename)); + } + + + private BufferedInputStream openURL(String url) + { + InputStream s = null; + + try { + URL aURL = new URL(url); + s = aURL.openStream(); + } catch (Exception e) { + errorDialog("Error Opening URL", e.getMessage()); + return null; + } + + return new BufferedInputStream(s); + } + + private String encodingName(CharsetMatch match) + { + return match.getName() + " (" + match.getLanguage() + ")"; + } + + private void setMatchMenu(CharsetMatch[] matches) + { + JMenu menu = getJMenuBar().getMenu(1); + JMenuItem menuItem; + + menu.removeAll(); + + for (int i = 0; i < matches.length; i += 1) { + CharsetMatch match = matches[i]; + + menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence()); + + menu.add(menuItem); + } + } + + private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'}; + private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'}; + private static int BUFFER_SIZE = 100000; + + private boolean openTag(byte[] buffer, int offset, int length, byte[] tag) + { + int tagLen = tag.length; + int bufRem = length - offset; + int b; + + for (b = 0; b < tagLen && b < bufRem; b += 1) { + if (buffer[b + offset] != tag[b]) { + return false; + } + } + + return b == tagLen; + } + + private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag) + { + if (buffer[offset] != (byte) '/') { + return false; + } + + return openTag(buffer, offset + 1, length, tag); + } + + private byte[] filter(InputStream in) + { + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRemaining = BUFFER_SIZE; + int bufLen = 0; + + in.mark(BUFFER_SIZE); + + try { + while (bytesRemaining > 0) { + int bytesRead = in.read(buffer, bufLen, bytesRemaining); + + if (bytesRead <= 0) { + break; + } + + bufLen += bytesRead; + bytesRemaining -= bytesRead; + } + } catch (Exception e) { + // TODO: error handling? + return null; + } + + boolean inTag = false; + boolean skip = false; + int out = 0; + + for (int i = 0; i < bufLen; i += 1) { + byte b = buffer[i]; + + if (b == (byte) '<') { + inTag = true; + + if (openTag(buffer, i + 1, bufLen, scriptTag) || + openTag(buffer, i + 1, bufLen, styleTag)) { + skip = true; + } else if (closedTag(buffer, i + 1, bufLen, scriptTag) || + closedTag(buffer, i + 1, bufLen, styleTag)) { + skip = false; + } + } else if (b == (byte) '>') { + inTag = false; + } else if (! (inTag || skip)) { + buffer[out++] = b; + } + } + + byte[] filtered = new byte[out]; + + System.arraycopy(buffer, 0, filtered, 0, out); + return filtered; + } + + private CharsetMatch[] detect(byte[] bytes) + { + CharsetDetector det = new CharsetDetector(); + + det.setText(bytes); + + return det.detectAll(); + } + + private CharsetMatch[] detect(BufferedInputStream inputStream) + { + CharsetDetector det = new CharsetDetector(); + + try { + det.setText(inputStream); + + return det.detectAll(); + } catch (Exception e) { + // TODO: error message? + return null; + } + } + + private void show(InputStream inputStream, CharsetMatch[] matches, String title) + { + InputStreamReader isr; + char[] buffer = new char[1024]; + int bytesRead = 0; + + try { + StringBuffer sb = new StringBuffer(); + + inputStream.reset(); + + isr = new InputStreamReader(inputStream, matches[0].getName()); + + while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) { + sb.append(buffer, 0, bytesRead); + } + + this.setTitle(title + " - " + encodingName(matches[0])); + + setMatchMenu(matches); + text.setText(sb.toString()); + + isr.close(); + } catch (IOException e) { + errorDialog("IO Error", e.getMessage()); + } catch (Exception e) { + errorDialog("Internal Error", e.getMessage()); + } + } + + private void doNew() + { + // open a new window... + } + + private void doOpenFile() + { + int retVal = fileChooser.showOpenDialog(this); + + if (retVal == JFileChooser.APPROVE_OPTION) { + File file = fileChooser.getSelectedFile(); + BufferedInputStream inputStream = openFile(file); + + if (inputStream != null) { + CharsetMatch[] matches = detect(inputStream); + + show(inputStream, matches, file.getName()); + } + } + } + + private void doOpenURL() + { + String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE, + null, null, null); + + if (url != null && url.length() > 0) { + BufferedInputStream inputStream = openURL(url); + + if (inputStream != null) { + byte[] filtered = filter(inputStream); + CharsetMatch[] matches = detect(filtered); + + show(inputStream, matches, url); + } + } +} + + private void doQuit() + { + System.exit(0); + } + + private JMenuBar makeMenus() + { + JMenu menu = new JMenu("File"); + JMenuItem mi; + + mi = new JMenuItem("Open File..."); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + + mi = new JMenuItem("Open URL..."); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + + mi = new JMenuItem("Quit"); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + + JMenuBar mbar = new JMenuBar(); + mbar.add(menu); + + menu = new JMenu("Detected Encodings"); + mbar.add(menu); + + return mbar; + } +}