前言
一个正确的Class文件由以下部分组成:

魔数(magic u4):固定为 CAFE BABE(16进制),这是class文件的固定标志,表明此文件是Java的class
版本号(minor_version u2 major_version u2):前后两个字节分别为minor version和major version
常量池个数(constant_pool_count u2):值得注意的是,此处的个数比实际数量多1,因为第0号常量池被JVM内部占用,因此class的常量编号从1开始
常量池(constant_pool 表):常量池的内容
类结构,包括:access_flags this_class super_class interfaces_count interfaces[] fields_count fields[] methods_count methods[] attributes_count attributes[] 等等
接下来,我将通过编写一个简单的class文件解析器来帮助理解class文件的格式
package parser;
import java.io.*;
public class Main {
public static void main(String[] args) {
try (DataInputStream dis = new DataInputStream(new FileInputStream("E:\\Code\\Java\\JDKLearn\\src\\Main.class"))) {
// 显示魔数 u4
int magic = dis.readInt();
System.out.printf("Magic: 0x%08X%n", magic);
if (magic != 0xCAFEBABE) {
throw new IOException("Not a valid class file");
}
// 获取类版本号 u2
int minorVersion = dis.readUnsignedShort();
int majorVersion = dis.readUnsignedShort();
System.out.printf("Version: %d.%d%n", majorVersion, minorVersion);
// 获取常量池个数
int constantPoolCount = dis.readUnsignedShort();
ConstantPool constantPool = new ConstantPool(constantPoolCount, dis);
System.out.println("Constant pool count: " + constantPoolCount);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
这个简单的示例演示了如何读取类的一些固定信息,下面,我将开始对常量池进行解析。
一. 解析常量池
首先,我们需要知道常量池里的常量有哪些类型
常量池由两个部分组成,即
cp_info {
u1 tag;
u1 info[];
}这里的tag决定了常量池的类型,而info[]则是常量池的具体内容
总的来说,常量分为字面量和符号引用两种大类,而其中又分为若干种

字面量有:CONSTANT_Integer CONSTANT_Float CONSTANT_Long CONSTANT_Double CONSTANT_String CONSTANT_Utf8
而其他的,如CONSTANT_Class等则是符号引用
知道了存储结构后,我们则可根据官方文档给出的内容来逐步解析常量池(4.4.1 - 4.4.10)
package parser;
import java.io.DataInputStream;
import java.util.LinkedList;
public class ConstantPoolParser {
// Table 4.4-A https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.4
private static final int CONSTANT_Utf8 = 1;
private static final int CONSTANT_Integer = 3;
private static final int CONSTANT_Float = 4;
private static final int CONSTANT_Long = 5;
private static final int CONSTANT_Double = 6;
private static final int CONSTANT_Class = 7;
private static final int CONSTANT_String = 8;
private static final int CONSTANT_Fieldref = 9;
private static final int CONSTANT_Methodref = 10;
private static final int CONSTANT_InterfaceMethodref = 11;
private static final int CONSTANT_NameAndType = 12;
private static final int CONSTANT_MethodHandle = 15;
private static final int CONSTANT_MethodType = 16;
private static final int CONSTANT_InvokeDynamic = 18;
// 模拟一个常量池
private final int constantPoolCount;
private final DataInputStream dis;
public final LinkedList<cpInfo> constantPool = new LinkedList<>();
public ConstantPoolParser(int constantPoolCount, DataInputStream dis) {
this.constantPoolCount = constantPoolCount;
this.dis = dis;
}
public void parse() {
// 第0个被JVM占用,第一个是空
constantPool.add(null);
for (int i = 1; i < constantPoolCount; i++) {
try {
int tag = dis.readUnsignedByte();
cpInfo entry = null;
switch (tag) {
case CONSTANT_Utf8 -> {
int length = dis.readUnsignedShort();
byte[] bytes = dis.readNBytes(length);
entry = new UtfInfo(tag, new String(bytes, "UTF-8"));
}
case CONSTANT_Integer -> entry = new IntegerInfo(tag, dis.readInt());
case CONSTANT_Float -> entry = new FloatInfo(tag, dis.readFloat());
case CONSTANT_Long -> {
long value = dis.readLong();
entry = new LongInfo(tag, value);
constantPool.add(entry);
constantPool.add(null); // Long/Double 占两个常量池位置
i++;
continue;
}
case CONSTANT_Double -> {
double value = dis.readDouble();
entry = new DoubleInfo(tag, value);
constantPool.add(entry);
constantPool.add(null);
i++;
continue;
}
case CONSTANT_Class -> entry = new ClassInfo(tag, dis.readUnsignedShort());
case CONSTANT_String -> entry = new StringInfo(tag, dis.readUnsignedShort());
case CONSTANT_Fieldref ->
entry = new FieldRefInfo(tag, dis.readUnsignedShort(), dis.readUnsignedShort());
case CONSTANT_Methodref ->
entry = new MethodRefInfo(tag, dis.readUnsignedShort(), dis.readUnsignedShort());
case CONSTANT_InterfaceMethodref ->
entry = new InterfaceMethodRefInfo(tag, dis.readUnsignedShort(), dis.readUnsignedShort());
case CONSTANT_NameAndType ->
entry = new NameAndTypeInfo(tag, dis.readUnsignedShort(), dis.readUnsignedShort());
case CONSTANT_MethodHandle ->
entry = new MethodHandleInfo(tag, dis.readUnsignedByte(), dis.readUnsignedShort());
case CONSTANT_MethodType -> entry = new MethodTypeInfo(tag, dis.readUnsignedShort());
case CONSTANT_InvokeDynamic ->
entry = new InvokeDynamicInfo(tag, dis.readUnsignedShort(), dis.readUnsignedShort());
default -> System.out.printf("Unknown tag: %d%n", tag);
}
constantPool.add(entry);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public void printConstantPool() {
for (int i = 1; i < constantPool.size(); i++) {
cpInfo c = constantPool.get(i);
if (c == null) continue; // long/double 第二个占位
System.out.printf("#%-3d = %s%n", i, c.toString(this));
}
}
public String getUtf8(int index) {
cpInfo c = constantPool.get(index);
if (c instanceof UtfInfo u) return u.value;
return "#" + index;
}
static abstract class cpInfo {
int tag;
abstract String toString(ConstantPoolParser pool);
}
static class UtfInfo extends cpInfo {
String value;
UtfInfo(int tag, String value) {
this.tag = tag;
this.value = value;
}
@Override
String toString(ConstantPoolParser pool) {
return "Utf8 \"" + value + "\"";
}
}
static class IntegerInfo extends cpInfo {
int value;
IntegerInfo(int tag, int value) {
this.tag = tag;
this.value = value;
}
@Override
String toString(ConstantPoolParser pool) {
return "Integer " + value;
}
}
static class FloatInfo extends cpInfo {
float value;
FloatInfo(int tag, float value) {
this.tag = tag;
this.value = value;
}
@Override
String toString(ConstantPoolParser pool) {
return "Float " + value;
}
}
static class LongInfo extends cpInfo {
long value;
LongInfo(int tag, long value) {
this.tag = tag;
this.value = value;
}
@Override
String toString(ConstantPoolParser pool) {
return "Long " + value;
}
}
static class DoubleInfo extends cpInfo {
double value;
DoubleInfo(int tag, double value) {
this.tag = tag;
this.value = value;
}
@Override
String toString(ConstantPoolParser pool) {
return "Double " + value;
}
}
static class ClassInfo extends cpInfo {
int name_index;
ClassInfo(int tag, int name_index) {
this.tag = tag;
this.name_index = name_index;
}
@Override
String toString(ConstantPoolParser pool) {
return "Class " + pool.getUtf8(name_index);
}
}
static class StringInfo extends cpInfo {
int string_index;
StringInfo(int tag, int string_index) {
this.tag = tag;
this.string_index = string_index;
}
@Override
String toString(ConstantPoolParser pool) {
return "String \"" + pool.getUtf8(string_index) + "\"";
}
}
static class NameAndTypeInfo extends cpInfo {
int name_index, descriptor_index;
NameAndTypeInfo(int tag, int n, int d) {
this.tag = tag;
this.name_index = n;
this.descriptor_index = d;
}
@Override
String toString(ConstantPoolParser pool) {
return pool.getUtf8(name_index) + ":" + pool.getUtf8(descriptor_index);
}
}
static class FieldRefInfo extends cpInfo {
int class_index, name_and_type_index;
FieldRefInfo(int tag, int c, int n) {
this.tag = tag;
this.class_index = c;
this.name_and_type_index = n;
}
@Override
String toString(ConstantPoolParser pool) {
return "Fieldref " + pool.getUtf8(((ClassInfo) pool.constantPool.get(class_index)).name_index)
+ "." + pool.getUtf8(((NameAndTypeInfo) pool.constantPool.get(name_and_type_index)).name_index)
+ ":" + pool.getUtf8(((NameAndTypeInfo) pool.constantPool.get(name_and_type_index)).descriptor_index);
}
}
static class MethodRefInfo extends cpInfo {
int class_index, name_and_type_index;
MethodRefInfo(int tag, int c, int n) {
this.tag = tag;
this.class_index = c;
this.name_and_type_index = n;
}
@Override
String toString(ConstantPoolParser pool) {
ClassInfo cls = (ClassInfo) pool.constantPool.get(class_index);
NameAndTypeInfo nt = (NameAndTypeInfo) pool.constantPool.get(name_and_type_index);
return "Methodref " + pool.getUtf8(cls.name_index)
+ "." + pool.getUtf8(nt.name_index)
+ ":" + pool.getUtf8(nt.descriptor_index);
}
}
static class InterfaceMethodRefInfo extends cpInfo {
int class_index, name_and_type_index;
InterfaceMethodRefInfo(int tag, int c, int n) {
this.tag = tag;
this.class_index = c;
this.name_and_type_index = n;
}
@Override
String toString(ConstantPoolParser pool) {
ClassInfo cls = (ClassInfo) pool.constantPool.get(class_index);
NameAndTypeInfo nt = (NameAndTypeInfo) pool.constantPool.get(name_and_type_index);
return "InterfaceMethodref " + pool.getUtf8(cls.name_index)
+ "." + pool.getUtf8(nt.name_index)
+ ":" + pool.getUtf8(nt.descriptor_index);
}
}
static class MethodHandleInfo extends cpInfo {
int reference_kind, reference_index;
MethodHandleInfo(int tag, int kind, int index) {
this.tag = tag;
this.reference_kind = kind;
this.reference_index = index;
}
@Override
String toString(ConstantPoolParser pool) {
return "MethodHandle kind=" + reference_kind + " ref=" + pool.constantPool.get(reference_index).toString(pool);
}
}
static class MethodTypeInfo extends cpInfo {
int descriptor_index;
MethodTypeInfo(int tag, int descriptor_index) {
this.tag = tag;
this.descriptor_index = descriptor_index;
}
@Override
String toString(ConstantPoolParser pool) {
return "MethodType " + pool.getUtf8(descriptor_index);
}
}
static class InvokeDynamicInfo extends cpInfo {
int bootstrap_method_attr_index, name_and_type_index;
InvokeDynamicInfo(int tag, int b, int n) {
this.tag = tag;
this.bootstrap_method_attr_index = b;
this.name_and_type_index = n;
}
@Override
String toString(ConstantPoolParser pool) {
NameAndTypeInfo nt = (NameAndTypeInfo) pool.constantPool.get(name_and_type_index);
return "InvokeDynamic #" + bootstrap_method_attr_index + " " +
pool.getUtf8(nt.name_index) + ":" + pool.getUtf8(nt.descriptor_index);
}
}
}测试主类:
package parser;
import java.io.*;
public class Main {
public static void main(String[] args) {
try (DataInputStream dis = new DataInputStream(new FileInputStream("E:\\Code\\Java\\JDKLearn\\src\\Main.class"))) {
// 显示魔数 u4
int magic = dis.readInt();
System.out.printf("Magic: 0x%08X%n", magic);
if (magic != 0xCAFEBABE) {
throw new IOException("Not a valid class file");
}
// 获取类版本号 u2
int minorVersion = dis.readUnsignedShort();
int majorVersion = dis.readUnsignedShort();
System.out.printf("Version: %d.%d%n", majorVersion, minorVersion);
// 获取常量池个数 u2
int constantPoolCount = dis.readUnsignedShort();
ConstantPoolParser constantPoolParser = new ConstantPoolParser(constantPoolCount, dis);
System.out.println("Constant pool count: " + constantPoolCount);
constantPoolParser.parse();
constantPoolParser.printConstantPool();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
运行结果:

二. 解析类属性
常量池解析完之后,我们再来回头看这张图:

接下来,我们就可以去解析 access_flags、this_class、super_class、interfaces_count、interfaces[]、fields_count、fields[]、methods_count、methods[]、attribute_count、attributes 这些元素了
我们先简单解析一下较为简单的类属性:
首先是access_flags
在这个例子里,它是00 21 ,那么,这代表什么?
查阅手册得知:

可以得知,access_flags是一个Bitmask(位掩码)
什么是位掩码?简而言之,就是使用二进制的不同位(bit)来表示不同的状态。
具体来说:0x0021 = 0b0000_0000_0010_0001
可以发现,它的第一位和第六位被设置为1。此时参考官方文档,可以得知,它代表两个flag:
ACC_PUBLIC 0x0001 -> 0b0000_0000_0010_0001
ACC_SUPER 0x0020 -> 0b0000_0000_0010_0000
让我们来编写一个工具类来解析这个位掩码(为了方便,这里也一遍将相似的Field和Method AccessFlags写了)
package utils;
import java.util.ArrayList;
import java.util.List;
public class AccessFlagUtils {
// Class flags Table 4.1-A. Class access and property modifiers
public static final int ACC_PUBLIC = 0x0001;
public static final int ACC_FINAL = 0x0010;
public static final int ACC_SUPER = 0x0020;
public static final int ACC_INTERFACE = 0x0200;
public static final int ACC_ABSTRACT = 0x0400;
public static final int ACC_SYNTHETIC = 0x1000;
public static final int ACC_ANNOTATION = 0x2000;
public static final int ACC_ENUM = 0x4000;
// Field flags Table 4.5-A. Field access and property flags
public static final int ACC_PRIVATE = 0x0002;
public static final int ACC_PROTECTED = 0x0004;
public static final int ACC_STATIC = 0x0008;
public static final int ACC_VOLATILE = 0x0040;
public static final int ACC_TRANSIENT = 0x0080;
// Method flags Table 4.6-A. Method access and property flags
public static final int ACC_SYNCHRONIZED = 0x0020;
public static final int ACC_NATIVE = 0x0100;
public static final int ACC_STRICT = 0x0800;
public static String decodeClassAccessFlags(int flags) {
List<String> list = new ArrayList<>();
if ((flags & ACC_PUBLIC) != 0) list.add("public");
if ((flags & ACC_FINAL) != 0) list.add("final");
if ((flags & ACC_SUPER) != 0) list.add("super");
if ((flags & ACC_INTERFACE) != 0) list.add("interface");
if ((flags & ACC_ABSTRACT) != 0) list.add("abstract");
if ((flags & ACC_SYNTHETIC) != 0) list.add("synthetic");
if ((flags & ACC_ANNOTATION) != 0) list.add("annotation");
if ((flags & ACC_ENUM) != 0) list.add("enum");
return String.join(" ", list);
}
public static String decodeFieldAccessFlags(int flags) {
List<String> list = new ArrayList<>();
if ((flags & ACC_PUBLIC) != 0) list.add("public");
if ((flags & ACC_PRIVATE) != 0) list.add("private");
if ((flags & ACC_PROTECTED) != 0) list.add("protected");
if ((flags & ACC_STATIC) != 0) list.add("static");
if ((flags & ACC_FINAL) != 0) list.add("final");
if ((flags & ACC_VOLATILE) != 0) list.add("volatile");
if ((flags & ACC_TRANSIENT) != 0) list.add("transient");
if ((flags & ACC_SYNTHETIC) != 0) list.add("synthetic");
if ((flags & ACC_ENUM) != 0) list.add("enum");
return String.join(" ", list);
}
public static String decodeMethodAccessFlags(int flags) {
List<String> list = new ArrayList<>();
if ((flags & ACC_PUBLIC) != 0) list.add("public");
if ((flags & ACC_PRIVATE) != 0) list.add("private");
if ((flags & ACC_PROTECTED) != 0) list.add("protected");
if ((flags & ACC_STATIC) != 0) list.add("static");
if ((flags & ACC_FINAL) != 0) list.add("final");
if ((flags & ACC_SYNCHRONIZED) != 0) list.add("synchronized");
if ((flags & ACC_NATIVE) != 0) list.add("native");
if ((flags & ACC_ABSTRACT) != 0) list.add("abstract");
if ((flags & ACC_STRICT) != 0) list.add("strictfp");
if ((flags & ACC_SYNTHETIC) != 0) list.add("synthetic");
return String.join(" ", list);
}
}结果如下


然后是this、super这两个属性,很简单,代码如下:
// access flags
int accessFlags = dis.readUnsignedShort();
String flags = AccessFlagUtils.decodeClassAccessFlags(accessFlags);
System.out.println("Class Access flags: " + flags);
// this
int thisClass = dis.readUnsignedShort();
String thisClassName = constantPoolParser.getUtf8(((ConstantPoolParser.ClassInfo) constantPoolParser.constantPool.get(thisClass)).name_index);
System.out.println("This class: " + thisClassName);
// super
int superClass = dis.readUnsignedShort();
String superClassName = constantPoolParser.getUtf8(((ConstantPoolParser.ClassInfo) constantPoolParser.constantPool.get(superClass)).name_index);
System.out.println("Super class: " + superClassName);三. 解析接口列表
interfaces[]
Each value in the
interfacesarray must be a valid index into theconstant_pooltable. Theconstant_poolentry at each value ofinterfaces[i], where 0 ≤ i <interfaces_count, must be aCONSTANT_Class_infostructure representing an interface that is a direct superinterface of this class or interface type, in the left-to-right order given in the source for the type.
根据官方文档的定义,我们可以很容易地写出对应的解析代码。
而这里的interfaces,其实就是class implments 的接口类列表
代码如下:
// interfaces
int interfacesCount = dis.readUnsignedShort();
System.out.println("Interfaces count: " + interfacesCount);
for (int i = 0; i < interfacesCount; i++) {
int interfaceIndex = dis.readUnsignedShort();
String interfaceName = constantPoolParser.getUtf8(((ConstantPoolParser.ClassInfo) constantPoolParser.constantPool.get(interfaceIndex)).name_index);
System.out.println("Interface: " + interfaceName);
}四. 解析字段列表
fields[]
Each value in the
fieldstable must be afield_infostructure (§4.5) giving a complete description of a field in this class or interface. Thefieldstable includes only those fields that are declared by this class or interface. It does not include items representing fields that are inherited from superclasses or superinterfaces.
field_info {
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u2 attributes_count;
attribute_info attributes[attributes_count];
}
attribute_info {
u2 attribute_name_index;
u4 attribute_length;
u1 info[attribute_length];
}根据官方文档的定义,我们可以很容易地写出对应的解析代码。
而这里的fields,其实就是这个类的成员变量列表
代码如下(涉及attributes表的部分暂时跳过):
// fields
int fieldsCount = dis.readUnsignedShort();
System.out.println("Fields count: " + fieldsCount);
for (int i = 0; i < fieldsCount; i++) {
int field_accessFlags = dis.readUnsignedShort();
String field_flags = AccessFlagUtils.decodeFieldAccessFlags(accessFlags);
System.out.println("Field Access flags: " + field_flags);
int field_nameIndex = dis.readUnsignedShort();
String field_name = constantPoolParser.getUtf8(field_nameIndex);
System.out.println("Field name: " + field_name);
int field_descriptorIndex = dis.readUnsignedShort();
String field_descriptor = constantPoolParser.getUtf8(field_descriptorIndex);
System.out.println("Field descriptor: " + field_descriptor);
int field_attributesCount = dis.readUnsignedShort();
System.out.println("Field attributes count: " + field_attributesCount);
for (int j = 0; j < field_attributesCount; j++) {
int field_attributeNameIndex = dis.readUnsignedShort();
String field_attributeName = constantPoolParser.getUtf8(field_attributeNameIndex);
System.out.println("Field attribute name: " + field_attributeName);
int field_attributeLength = dis.readInt();
System.out.println("Field attribute length: " + field_attributeLength);
// skip for now
dis.skipBytes(field_attributeLength);
System.out.println("Skipped " + field_attributeLength + " bytes");
}
}五. 解析方法列表
methods[]
Each value in the
methodstable must be amethod_infostructure (§4.6) giving a complete description of a method in this class or interface. If neither of theACC_NATIVEandACC_ABSTRACTflags are set in theaccess_flagsitem of amethod_infostructure, the Java Virtual Machine instructions implementing the method are also supplied.The
method_infostructures represent all methods declared by this class or interface type, including instance methods, class methods, instance initialization methods (§2.9), and any class or interface initialization method (§2.9). Themethodstable does not include items representing methods that are inherited from superclasses or superinterfaces.
method_info {
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u2 attributes_count;
attribute_info attributes[attributes_count];
}
attribute_info {
u2 attribute_name_index;
u4 attribute_length;
u1 info[attribute_length];
}同样的,根据文档编写对应代码
// methods
int methodsCount = dis.readUnsignedShort();
System.out.println("Methods count: " + methodsCount);
for (int i = 0; i < methodsCount; i++) {
int method_accessFlags = dis.readUnsignedShort();
String method_flags = AccessFlagUtils.decodeMethodAccessFlags(method_accessFlags);
System.out.println("Method Access flags: " + method_flags);
int method_nameIndex = dis.readUnsignedShort();
String method_name = constantPoolParser.getUtf8(method_nameIndex);
System.out.println("Method name: " + method_name);
int method_descriptorIndex = dis.readUnsignedShort();
String method_descriptor = constantPoolParser.getUtf8(method_descriptorIndex);
System.out.println("Method descriptor: " + method_descriptor);
int method_attributesCount = dis.readUnsignedShort();
System.out.println("Method attributes count: " + method_attributesCount);
for (int j = 0; j < method_attributesCount; j++) {
int method_attributeNameIndex = dis.readUnsignedShort();
String method_attributeName = constantPoolParser.getUtf8(method_attributeNameIndex);
System.out.println("Method attribute name: " + method_attributeName);
int method_attributeLength = dis.readInt();
System.out.println("Method attribute length: " + method_attributeLength);
// skip for now
dis.skipBytes(method_attributeLength);
System.out.println("Skipped " + method_attributeLength + " bytes");
}
}六. 解析属性列表
attributes[]
Each value of the
attributestable must be anattribute_infostructure (§4.7).The attributes defined by this specification as appearing in the
attributestable of aClassFilestructure are listed in Table 4.7-C.The rules concerning attributes defined to appear in the
attributestable of aClassFilestructure are given in §4.7.The rules concerning non-predefined attributes in the
attributestable of aClassFilestructure are given in §4.7.1.
attribute_info {
u2 attribute_name_index;
u4 attribute_length;
u1 info[attribute_length];
}属性列表是最复杂的部分,以下是全部的属性类型:

这里的属性有可能出现在很多地方,在Class、Method、Field后面都可能出现
我们这里就不处理全部的属性了,我们只解析 Code_attribute 这一最重要的属性作为示例
Code_attribute {
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 code[code_length];
u2 exception_table_length;
{ u2 start_pc;
u2 end_pc;
u2 handler_pc;
u2 catch_type;
} exception_table[exception_table_length];
u2 attributes_count;
attribute_info attributes[attributes_count];
}好,回到我们刚才空下来的Method Attribute,并将它补全:
for (int j = 0; j < method_attributesCount; j++) {
int method_attributeNameIndex = dis.readUnsignedShort();
String method_attributeName = constantPoolParser.getUtf8(method_attributeNameIndex);
System.out.println("Method attribute name: " + method_attributeName);
int method_attributeLength = dis.readInt();
System.out.println("Method attribute length: " + method_attributeLength);
if (method_attributeName.equals("Code")){
int max_stack = dis.readUnsignedShort();
System.out.println("Max stack: " + max_stack);
int max_locals = dis.readUnsignedShort();
System.out.println("Max locals: " + max_locals);
int code_length = dis.readInt();
System.out.println("Code length: " + code_length);
byte[] code = dis.readNBytes(code_length);
// print signed code to unsigned
for (int k = 0; k < code_length; k++) {
System.out.print(Byte.toUnsignedInt(code[k]) + " ");
}
System.out.println();
int exception_table_length = dis.readUnsignedShort();
System.out.println("Exception table length: " + exception_table_length);
for (int k = 0; k < exception_table_length; k++) {
int start_pc = dis.readUnsignedShort();
System.out.println("Start PC: " + start_pc);
int end_pc = dis.readUnsignedShort();
System.out.println("End PC: " + end_pc);
int handler_pc = dis.readUnsignedShort();
System.out.println("Handler PC: " + handler_pc);
int catch_type = dis.readUnsignedShort();
String catch_type_name = constantPoolParser.getUtf8(catch_type);
System.out.println("Catch type: " + catch_type_name);
}
int attributes_count = dis.readUnsignedShort();
System.out.println("Attributes count: " + attributes_count);
for (int k = 0; k < attributes_count; k++) {
int attributeNameIndex = dis.readUnsignedShort();
String attributeName = constantPoolParser.getUtf8(attributeNameIndex);
System.out.println("Attribute name: " + attributeName);
int attributeLength = dis.readInt();
System.out.println("Attribute length: " + attributeLength);
// skip for now
dis.skipBytes(attributeLength);
System.out.println("Skipped " + attributeLength + " bytes");
}
}else {
// skip for now
dis.skipBytes(method_attributeLength);
System.out.println("Skipped " + method_attributeLength + " bytes");
}
}此时我们已经可以获取opcodes的列表了

这里的code部分就是opcodes和对应的操作数的字节流,不同的Opcodes后面可能会跟不同长度的操作数
具体可在文档中查阅:
为了更直观一点,我们再编写一个工具类,将opcodes数据转换为可读的字符串(由于Java的Opcodes类型很多,我这里并没有完整实现):
package utils;
import java.util.*;
public class BytecodeDisassembler {
private static final Map<Integer, String> OPCODES = new HashMap<>();
static {
// 常用加载指令
OPCODES.put(0x2A, "aload_0");
OPCODES.put(0x2B, "aload_1");
OPCODES.put(0x1A, "iload_0");
OPCODES.put(0x1B, "iload_1");
OPCODES.put(0x1C, "iload_2");
OPCODES.put(0x1D, "iload_3");
// 存储指令
OPCODES.put(0x3C, "istore_1");
OPCODES.put(0x3D, "istore_2");
OPCODES.put(0x4C, "astore_1");
OPCODES.put(0x4D, "astore_2");
// 常量加载
OPCODES.put(0x10, "bipush");
OPCODES.put(0x11, "sipush");
OPCODES.put(0x12, "ldc");
OPCODES.put(0x13, "ldc_w");
OPCODES.put(0x14, "ldc2_w");
// 字段与方法
OPCODES.put(0xB2, "getstatic");
OPCODES.put(0xB3, "putstatic");
OPCODES.put(0xB4, "getfield");
OPCODES.put(0xB5, "putfield");
OPCODES.put(0xB6, "invokevirtual");
OPCODES.put(0xB7, "invokespecial");
OPCODES.put(0xB8, "invokestatic");
// 控制流
OPCODES.put(0xA7, "goto");
// 对象操作
OPCODES.put(0xBB, "new");
OPCODES.put(0x59, "dup");
// 返回
OPCODES.put(0xB1, "return");
OPCODES.put(0xAC, "ireturn");
OPCODES.put(0xB0, "areturn");
// iconst
for (int i = 0x01; i <= 0x08; i++) {
OPCODES.put(i, "iconst_" + i);
}
}
public static List<String> disassemble(int[] code) {
List<String> result = new ArrayList<>();
int pc = 0;
while (pc < code.length) {
int opcode = code[pc];
String mnemonic = OPCODES.getOrDefault(opcode, String.format("%02X", opcode));
String line;
switch (opcode) {
case 0x12: // ldc (u1)
line = String.format("%d: %s #%d", pc, mnemonic, code[pc + 1]);
pc += 2;
break;
case 0x13: // ldc_w (u2)
case 0x14: // ldc2_w (u2)
case 0xB2: // getstatic
case 0xB3: // putstatic
case 0xB4: // getfield
case 0xB5: // putfield
case 0xB6: // invokevirtual
case 0xB7: // invokespecial
case 0xB8: // invokestatic
case 0xBB: // new
int index = (code[pc + 1] << 8) | code[pc + 2];
line = String.format("%d: %s #%d", pc, mnemonic, index);
pc += 3;
break;
case 0x10: // bipush
line = String.format("%d: %s %d", pc, mnemonic, (byte) code[pc + 1]);
pc += 2;
break;
case 0x11: // sipush
int val = (code[pc + 1] << 8) | code[pc + 2];
line = String.format("%d: %s %d", pc, mnemonic, val);
pc += 3;
break;
case 0xA7: // goto (branch offset)
int offset = (short) ((code[pc + 1] << 8) | code[pc + 2]);
line = String.format("%d: %s %d", pc, mnemonic, offset);
pc += 3;
break;
default:
// 无操作数
line = String.format("%d: %s", pc, mnemonic);
pc += 1;
break;
}
result.add(line);
}
return result;
}
public static void printDisassembly(int[] code) {
for (String line : disassemble(code)) {
System.out.println(line);
}
}
}

总结
编写这个ClassFile解析器本身没有什么难度,主要是手写一遍用于加深对ClassFile结构的学习记忆。
国内对于Class本身这样字节级别的分析很少,因此我也不得不参考官方JVM设计文档。
在实际的开发应用中,我们更多的是使用ASM框架进行字节码级别的操作,它的Visitor模式和Tree模式非常直观方便,自己编写字节码解析器的意义不大。
最后,希望本文可以给你提供一定的帮助!
参考资料:https://docs.oracle.com/javase/specs/jvms/se8/html https://www.bilibili.com/video/av541002913