服务器之家:专注于服务器技术及软件下载分享
分类导航

PHP教程|ASP.NET教程|Java教程|ASP教程|编程技术|正则表达式|C/C++|IOS|C#|Swift|Android|VB|R语言|JavaScript|易语言|vb.net|

服务器之家 - 编程语言 - Java教程 - 利用Java实现简单的词法分析器实例代码

利用Java实现简单的词法分析器实例代码

2020-07-17 12:11DuYue Java教程

众所周知编译原理中的词法分析算是很重要的一个部分,原理比较简单,不过网上大部分都是用C语言或者C++来编写,因为最近在学习Java,故用Java语言实现了简单的词法分析器。感兴趣的朋友们可以参考借鉴,下面来一起看看吧。

首先看下我们要分析的代码段如下:

利用Java实现简单的词法分析器实例代码

输出结果如下:

利用Java实现简单的词法分析器实例代码

输出结果(a).PNG

利用Java实现简单的词法分析器实例代码

输出结果(b).PNG

利用Java实现简单的词法分析器实例代码

输出结果(c).PNG

括号里是一个二元式:(单词类别编码,单词位置编号)

代码如下:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
package Yue.LexicalAnalyzer;
 
import java.io.*;
 
/*
 * 主程序
 */
public class Main {
  public static void main(String[] args) throws IOException {
    Lexer lexer = new Lexer();
    lexer.printToken();
    lexer.printSymbolsTable();
  }
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
package Yue.LexicalAnalyzer;
 
import java.io.*;
import java.util.*;
 
/*
 * 词法分析并输出
 */
public class Lexer {
  /*记录行号*/
  public static int line = 1;
  /*存放最新读入的字符*/
  char character = ' ';
 
  /*保留字*/
  Hashtable<String, KeyWord> keywords = new Hashtable<String, KeyWord>();
  /*token序列*/
  private ArrayList<Token> tokens = new ArrayList<Token>();
  /*符号表*/
  private ArrayList<Symbol> symtable = new ArrayList<Symbol>();
 
  /*读取文件变量*/
  BufferedReader reader = null;
  /*保存当前是否读取到了文件的结尾*/
  private Boolean isEnd = false;
 
  /* 是否读取到文件的结尾 */
  public Boolean getReaderState() {
    return this.isEnd;
  }
 
  /*打印tokens序列*/
  public void printToken() throws IOException {
    FileWriter writer = new FileWriter("E:\\lex.txt");
    System.out.println("词法分析结果如下:");
    System.out.print("杜悦-2015220201031\r\n\n");
    writer.write("杜悦-2015220201031\r\n\r\n");
    while (getReaderState() == false) {
      Token tok = scan();
      String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
          + tok.name + ": " + tok.toString() + "\r\n";
      writer.write(str);
      System.out.print(str);
    }
    writer.flush();
 
  }
 
  /*打印符号表*/
  public void printSymbolsTable() throws IOException {
    FileWriter writer = new FileWriter("E:\\symtab1.txt");
    System.out.print("\r\n\r\n符号表\r\n");
    System.out.print("编号\t行号\t名称\r\n");
    writer.write("符号表\r\n");
    writer.write("编号 " + "\t行号 " + "\t名称 \r\n");
    Iterator<Symbol> e = symtable.iterator();
    while (e.hasNext()) {
      Symbol symbol = e.next();
      String desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.toString();
      System.out.print(desc + "\r\n");
      writer.write(desc + "\r\n");
    }
 
    writer.flush();
  }
 
  /*打印错误*/
  public void printError(Token tok) throws IOException{
    FileWriter writer = new FileWriter("E:\\error.txt");
    System.out.print("\r\n\r\n错误词法如下:\r\n");
    writer.write("错误词法如下:\r\n");
    String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
        + tok.name + ": " + tok.toString() + "\r\n";
    writer.write(str);
  }
 
  /*添加保留字*/
  void reserve(KeyWord w) {
    keywords.put(w.lexme, w);
  }
 
  public Lexer() {
    /*初始化读取文件变量*/
    try {
      reader = new BufferedReader(new FileReader("E:\\输入.txt"));
    } catch (IOException e) {
      System.out.print(e);
    }
 
    /*添加保留字*/
    this.reserve(KeyWord.begin);
    this.reserve(KeyWord.end);
    this.reserve(KeyWord.integer);
    this.reserve(KeyWord.function);
    this.reserve(KeyWord.read);
    this.reserve(KeyWord.write);
    this.reserve(KeyWord.aIf);
    this.reserve(KeyWord.aThen);
    this.reserve(KeyWord.aElse);
  }
 
  /*按字符读*/
  public void readch() throws IOException {
    character = (char) reader.read();
    if ((int) character == 0xffff) {
      this.isEnd = true;
    }
  }
 
  /*判断是否匹配*/
  public Boolean readch(char ch) throws IOException {
    readch();
    if (this.character != ch) {
      return false;
    }
 
    this.character = ' ';
    return true;
  }
 
  /*数字的识别*/
  public Boolean isDigit() throws IOException {
    if (Character.isDigit(character)) {
      int value = 0;
      while (Character.isDigit(character)) {
        value = 10 * value + Character.digit(character, 10);
        readch();
      }
 
      Num n = new Num(value);
      n.line = line;
      tokens.add(n);
      return true;
    } else
      return false;
  }
 
  /*保留字、标识符的识别*/
  public Boolean isLetter() throws IOException {
    if (Character.isLetter(character)) {
      StringBuffer sb = new StringBuffer();
 
      /*首先得到整个的一个分割*/
      while (Character.isLetterOrDigit(character)) {
        sb.append(character);
        readch();
      }
 
      /*判断是保留字还是标识符*/
      String s = sb.toString();
      KeyWord w = keywords.get(s);
 
      /*如果是保留字的话,w不应该是空的*/
      if (w != null) {
        w.line = line;
        tokens.add(w);
      } else {
        /*否则就是标识符,此处多出记录标识符编号的语句*/
        Symbol sy = new Symbol(s);
        Symbol mark = sy;      //用于标记已存在标识符
        Boolean isRepeat = false;
        sy.line = line;
        for (Symbol i : symtable) {
          if (sy.toString().equals(i.toString())) {
            mark = i;
            isRepeat = true;
          }
        }
        if (!isRepeat) {
          sy.pos = symtable.size() + 1;
          symtable.add(sy);
        } else if (isRepeat) {
          sy.pos = mark.pos;
        }
        tokens.add(sy);
      }
      return true;
    } else
      return false;
  }
 
  /*符号的识别*/
  public Boolean isSign() throws IOException {
    switch (character) {
      case '#':
        readch();
        AllEnd.allEnd.line = line;
        tokens.add(AllEnd.allEnd);
        return true;
      case '\r':
        if (readch('\n')) {
          readch();
          LineEnd.lineEnd.line = line;
          tokens.add(LineEnd.lineEnd);
          line++;
          return true;
        }
      case '(':
        readch();
        Delimiter.lpar.line = line;
        tokens.add(Delimiter.lpar);
        return true;
      case ')':
        readch();
        Delimiter.rpar.line = line;
        tokens.add(Delimiter.rpar);
        return true;
      case ';':
        readch();
        Delimiter.sem.line = line;
        tokens.add(Delimiter.sem);
        return true;
      case '+':
        readch();
        CalcWord.add.line = line;
        tokens.add(CalcWord.add);
        return true;
      case '-':
        readch();
        CalcWord.sub.line = line;
        tokens.add(CalcWord.sub);
        return true;
      case '*':
        readch();
        CalcWord.mul.line = line;
        tokens.add(CalcWord.mul);
        return true;
      case '/':
        readch();
        CalcWord.div.line = line;
        tokens.add(CalcWord.div);
        return true;
      case ':':
        if (readch('=')) {
          readch();
          CalcWord.assign.line = line;
          tokens.add(CalcWord.assign);
          return true;
        }
        break;
      case '>':
        if (readch('=')) {
          readch();
          CalcWord.ge.line = line;
          tokens.add(CalcWord.ge);
          return true;
        }
        break;
      case '<':
        if (readch('=')) {
          readch();
          CalcWord.le.line = line;
          tokens.add(CalcWord.le);
          return true;
        }
        break;
      case '!':
        if (readch('=')) {
          readch();
          CalcWord.ne.line = line;
          tokens.add(CalcWord.ne);
          return true;
        }
        break;
    }
    return false;
  }
 
 
  /*下面开始分割关键字,标识符等信息*/
  public Token scan() throws IOException {
    Token tok;
    while (character == ' ')
      readch();
    if (isDigit() || isSign() || isLetter()) {
      tok = tokens.get(tokens.size() - 1);
    } else {
      tok = new Token(character);
      printError(tok);
    }
    return tok;
  }
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package Yue.LexicalAnalyzer;
 
/*
 * Token父类
 */
public class Token {
  public final int tag;
  public int line = 1;
  public String name = "";
  public int pos = 0;
 
  public Token(int t) {
    this.tag = t;
  }
 
  public String toString() {
    return "" + (char) tag;
  }
 
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package Yue.LexicalAnalyzer;
 
/*
 * 单词类别赋值
 */
public class Tag {
  public final static int
      BEGIN = 1,     //保留字
      END = 2,      //保留字
      INTEGER = 3,    //保留字
      FUNCTION = 4,    //保留字
      READ = 5,      //保留字
      WRITE = 6,     //保留字
      IF = 7,       //保留字
      THEN = 8,      //保留字
      ELSE = 9,      //保留字
      SYMBOL = 11,    //标识符
      CONSTANT = 12,   //常数
      ADD = 13,      //运算符 "+"
      SUB = 14,      //运算符 "-"
      MUL = 15,      //运算符 "*"
      DIV = 16,      //运算符 "/"
      LE = 18,      //运算符 "<="
      GE = 19,      //运算符 ">="
      NE = 20,      //运算符 "!="
      ASSIGN = 23,    //运算符 ":="
      LPAR = 24,     //界符 "("
      RPAR = 25,     //界符 ")"
      SEM = 26,      //界符 ";"
      LINE_END = 27,   //行尾符
      ALL_END = 28;    //结尾符 "#"
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
package Yue.LexicalAnalyzer;
 
/**
 * 保留字
 */
public class KeyWord extends Token {
  public String lexme = "";
 
  public KeyWord(String s, int t) {
    super(t);
    this.lexme = s;
    this.name = "保留字";
  }
 
  public String toString() {
    return this.lexme;
  }
 
  public static final KeyWord
      begin = new KeyWord("begin", Tag.BEGIN),
      end = new KeyWord("end", Tag.END),
      integer = new KeyWord("integer", Tag.INTEGER),
      function = new KeyWord("function", Tag.FUNCTION),
      read = new KeyWord("read", Tag.READ),
      write = new KeyWord("write", Tag.WRITE),
      aIf = new KeyWord("if", Tag.IF),
      aThen = new KeyWord("then", Tag.THEN),
      aElse = new KeyWord("else", Tag.ELSE);
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
package Yue.LexicalAnalyzer;
 
/*
 * 标识符
 */
public class Symbol extends Token {
  public String lexme = "";
 
  public Symbol(String s) {
    super(Tag.SYMBOL);
    this.lexme = s;
    this.name = "标识符";
  }
 
  public String toString() {
    return this.lexme;
  }
 
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
package Yue.LexicalAnalyzer;
 
/**
 * 运算符
 */
public class CalcWord extends Token {
  public String lexme = "";
 
  public CalcWord(String s, int t) {
    super(t);
    this.lexme = s;
    this.name = "运算符";
  }
 
  public String toString() {
    return this.lexme;
  }
 
  public static final CalcWord
      add = new CalcWord("+", Tag.ADD),
      sub = new CalcWord("-", Tag.SUB),
      mul = new CalcWord("*", Tag.MUL),
      div = new CalcWord("/", Tag.DIV),
      le = new CalcWord("<=", Tag.LE),
      ge = new CalcWord(">=", Tag.GE),
      ne = new CalcWord("!=", Tag.NE),
      assign = new CalcWord(":=", Tag.ASSIGN);
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package Yue.LexicalAnalyzer;
 
/**
 * 界符
 */
public class Delimiter extends Token {
  public String lexme = "";
 
  public Delimiter(String s, int t) {
    super(t);
    this.lexme = s;
    this.name = "界符";
  }
 
  public String toString() {
    return this.lexme;
  }
 
  public static final Delimiter
      lpar = new Delimiter("(", Tag.LPAR),
      rpar = new Delimiter(")", Tag.RPAR),
      sem = new Delimiter(";", Tag.SEM);
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
package Yue.LexicalAnalyzer;
 
/*
 * 常数
 */
public class Num extends Token {
  public final int value;
 
  public Num(int v) {
    super(Tag.CONSTANT);
    this.value = v;
    this.name = "常数";
  }
 
  public String toString() {
    return "" + value;
  }
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package Yue.LexicalAnalyzer;
 
/**
 * 行尾符
 */
public class LineEnd extends Token {
  public String lexme = "";
 
  public LineEnd(String s) {
    super(Tag.LINE_END);
    this.lexme = s;
    this.name = "行尾符";
  }
 
  public String toString() {
    return this.lexme;
  }
 
  public static final LineEnd lineEnd = new LineEnd("\r\n");
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package Yue.LexicalAnalyzer;
 
/**
 * 结尾符
 */
public class AllEnd extends Token {
  public String lexme = "";
 
  public AllEnd(String s) {
    super(Tag.ALL_END);
    this.lexme = s;
    this.name = "结尾符";
  }
 
  public String toString() {
    return this.lexme;
  }
 
  public static final AllEnd allEnd = new AllEnd("#");
}

总结

以上就睡这篇文章的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流。

原文链接:http://www.jianshu.com/p/209f1fb6a827

延伸 · 阅读

精彩推荐