博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Java Program Mapping GB2312 to Unicode
阅读量:7263 次
发布时间:2019-06-29

本文共 4940 字,大约阅读时间需要 16 分钟。

In order to produce the mapping tables for this book, I wrote the following Java program, GB2312Unicode.java.

The output of this program is presented in the next 3 chapters.

/** * GB2312Unicode.java * Copyright (c) 1997-2003 by Dr. Herong Yang */import java.io.*;import java.nio.*;import java.nio.charset.*;class GB2312Unicode {   static OutputStream out = null;   static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',                             '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};   static int b_out[] = {201,267,279,293,484,587,625,657,734,782,827,      874,901,980,5590};   static int e_out[] = {216,268,280,294,494,594,632,694,748,794,836,      894,903,994,5594};   public static void main(String[] args) {      try {         out = new FileOutputStream("gb2312_unicode.gb");         writeCode();         out.close();      } catch (IOException e) {         System.out.println(e.toString());      }   }   public static void writeCode() throws IOException {      boolean reserved = false;      String name = null;      // GB2312 is not supported by JDK. So I am using GBK.      CharsetDecoder gbdc = Charset.forName("GBK").newDecoder();      CharsetEncoder uxec = Charset.forName("UTF-16BE").newEncoder();      CharsetEncoder u8ec = Charset.forName("UTF-8").newEncoder();      ByteBuffer gbbb = null;      ByteBuffer uxbb = null;      ByteBuffer u8bb = null;      CharBuffer cb = null;      int count = 0;      for (int i=1; i<=94; i++) {         // Defining row settings         if (i>=1 && i<=9) {            reserved = false;            name = "Graphic symbols";         } else if (i>=10 && i<=15) {            reserved = true;            name = "Reserved";         } else if (i>=16 && i<=55) {            reserved = false;            name = "Level 1 characters";         } else if (i>=56 && i<=87) {            reserved = false;            name = "Level 2 characters";         } else if (i>=88 && i<=94) {            reserved = true;            name = "Reserved";         }         // writing row title         writeln();         writeString("

"); writeNumber(i); writeString(" Row: "+name); writeln(); writeString("

"); writeln(); if (!reserved) { writeln(); writeHeader(); // looping through all characters in one row for (int j=1; j<=94; j++) { byte hi = (byte)(0xA0 + i); byte lo = (byte)(0xA0 + j); if (validGB(i,j)) { // getting GB, UTF-16BE, UTF-8 codes gbbb = ByteBuffer.wrap(new byte[]{hi,lo}); try { cb = gbdc.decode(gbbb); uxbb = uxec.encode(cb); cb.rewind(); u8bb = u8ec.encode(cb); } catch (CharacterCodingException e) { cb = null; uxbb = null; u8bb = null; } } else { cb = null; uxbb = null; u8bb = null; } writeNumber(i); writeNumber(j); writeString(" "); if (cb!=null) { writeByte(hi); writeByte(lo); writeString(" "); writeHex(hi); writeHex(lo); count++; } else { writeGBSpace(); writeString(" null"); } writeString(" "); writeByteBuffer(uxbb,2); writeString(" "); writeByteBuffer(u8bb,3); if (j%2 == 0) { writeln(); } else { writeString(" "); } } writeFooter(); } } System.out.println("Number of GB characters wrote: "+count); } public static void writeln() throws IOException { out.write(0x0D); out.write(0x0A); } public static void writeByte(byte b) throws IOException { out.write(b & 0xFF); } public static void writeByteBuffer(ByteBuffer b, int l) throws IOException { int i = 0; if (b==null) { writeString("null"); i = 2; } else { for (i=0; i
> 4) & 0x0F]); out.write((int) hexDigit[b & 0x0F]); } public static void writeHeader() throws IOException { writeString("
");      writeln();      writeString("Q.W. ");      writeGBSpace();      writeString(" GB   Uni. UTF-8 ");      writeString("   ");      writeString("Q.W. ");      writeGBSpace();      writeString(" GB   Uni. UTF-8 ");      writeln();      writeln();   }   public static void writeFooter() throws IOException {      writeString("
"); writeln(); } public static boolean validGB(int i,int j) { for (int l=0; l
=b_out[l] && i*100+j<=e_out[l]) return false; } return true; }}

转载地址:http://umddm.baihongyu.com/

你可能感兴趣的文章
MAMP和WAMP搭建Web环境,数据库,数据分布可视化
查看>>
Base64加密解密工具类
查看>>
Nginx的介绍以及编译安装详解
查看>>
WebSocket 学习--用nodejs搭建服务器
查看>>
str_repeat() 函数把字符串重复指定的次数。
查看>>
iOS 统计Xcode整个工程的代码行数
查看>>
react native android6+拍照闪退或重启的解决方案
查看>>
ASP.NET Core 企业级开发架构简介及框架汇总
查看>>
Mysql数据库再度使用
查看>>
链接器符号解析算法小解以及静态库链接顺序等等问题
查看>>
mosquitto安装和测试
查看>>
解决nginx负载均衡的session共享问题
查看>>
Markdown---语法小记
查看>>
配置https
查看>>
SQL Server 权限管理
查看>>
metasploit下Windows下多种提权方式
查看>>
Redis进阶实践之十一 Redis的Cluster集群搭建
查看>>
个人成长:目标、输入、输出
查看>>
创建cordova项目
查看>>
react 项目实战(十)引入AntDesign组件库
查看>>