服务器之家:专注于服务器技术及软件下载分享
分类导航

PHP教程|ASP.NET教程|Java教程|ASP教程|编程技术|正则表达式|C/C++|IOS|C#|Swift|Android|VB|R语言|JavaScript|易语言|vb.net|

服务器之家 - 编程语言 - Java教程 - Java实现lucene搜索功能的方法(推荐)

Java实现lucene搜索功能的方法(推荐)

2020-07-20 13:47jingxian Java教程

下面小编就为大家带来一篇Java实现lucene搜索功能的方法(推荐)。小编觉得挺不错的,现在就分享给大家,也给大家做个参考。一起跟随小编过来看看吧

直接上代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package com.sand.mpa.sousuo;
 
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
 
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
 
//增量索引
/*
* 实现思路:首次查询数据库表所有记录,对每条记录建立索引,并将最后一条记录的id存储到storeId.txt文件中
*   当新插入一条记录时,再建立索引时不必再对所有数据重新建一遍索引,
*   可根据存放在storeId.txt文件中的id查出新插入的数据,只对新增的数据新建索引,并把新增的索引追加到原来的索引文件中
* */
public class IncrementIndex {
 
 public static void main(String[] args) {
  try {
   IncrementIndex index = new IncrementIndex();
   String path = "E:\\Test\\lucene_test\\poiIdext";//索引文件的存放路径
   String storeIdPath = "E:\\Test\\lucene_test\\storeId.txt";//存储ID的路径
   String storeId = "";
   Date date1 = new Date();
   storeId = index.getStoreId(storeIdPath);
   ResultSet rs = index.getResult(storeId);
   System.out.println("开始建立索引。。。。");
   index.indexBuilding(path, storeIdPath, rs);
   Date date2 = new Date();
   System.out.println("耗时:"+(date2.getTime()-date1.getTime())+"ms");
   storeId = index.getStoreId(storeIdPath);
   System.out.println(storeId);//打印出这次存储起来的ID
  } catch (Exception e) {
   e.printStackTrace();
  }
 }
 
 public static void buildIndex(String indexFile, String storeIdFile) {
  try {
     
   String path = indexFile;//索引文件的存放路径
   String storeIdPath = storeIdFile;//存储ID的路径
   String storeId = "";
   storeId = getStoreId(storeIdPath);
   ResultSet rs = getResult(storeId);
   indexBuilding(path, storeIdPath, rs);
   storeId = getStoreId(storeIdPath);
 
  } catch (Exception e) {
   e.printStackTrace();
  }
 }
 
 public static ResultSet getResult(String storeId) throws Exception {
  Class.forName("com.mysql.jdbc.Driver").newInstance();
  String url = "jdbc:mysql://localhost:3306/1pm2_v1";
  String userName = "root";
  String password = "root";
  Connection conn = DriverManager.getConnection(url, userName, password);
  Statement stmt = conn.createStatement();
  String sql = "select * from pd_ugc";
  ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id");
  return rs;
 }
 
 public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) {
  try {
   Analyzer luceneAnalyzer = new StandardAnalyzer();
   // 取得存储起来的ID,以判定是增量索引还是重新索引
   boolean isEmpty = true;
   try {
    File file = new File(storeIdPath);
    if (!file.exists()) {
     file.createNewFile();
    }
    FileReader fr = new FileReader(storeIdPath);
    BufferedReader br = new BufferedReader(fr);
    if (br.readLine() != null) {
     isEmpty = false;
    }
    br.close();
    fr.close();
   } catch (IOException e) {
    e.printStackTrace();
   }
   //isEmpty=false表示增量索引
   IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);
   String storeId = "";
   boolean indexFlag = false;
   String id;
   String name;
   String address;
   String citycode;
   while (rs.next()) {
    id = rs.getInt("id") + "";
    name = rs.getString("name");
    address = rs.getString("address");
    citycode = rs.getString("citycode");
    writer.addDocument(Document(id, name, address, citycode));
    storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
    indexFlag = true;
   }
   writer.optimize();
   writer.close();
   if (indexFlag) {
    // 将最后一个的ID存到磁盘文件中
    writeStoreId(storeIdPath, storeId);
   }
   return true;
  } catch (Exception e) {
   e.printStackTrace();
   System.out.println("出错了" + e.getClass() + "\n 错误信息为: " + e.getMessage());
   return false;
  }
 
 }
 
 public static Document Document(String id, String name, String address, String citycode) {
  Document doc = new Document();
  doc.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
  doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
  doc.add(new Field("address", address, Field.Store.YES, Field.Index.TOKENIZED));
  doc.add(new Field("citycode", citycode, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
  return doc;
 }
 
 // 取得存储在磁盘中的ID
 public static String getStoreId(String path) {
  String storeId = "";
  try {
   File file = new File(path);
   if (!file.exists()) {
    file.createNewFile();
   }
   FileReader fr = new FileReader(path);
   BufferedReader br = new BufferedReader(fr);
   storeId = br.readLine();
   if (storeId == null || storeId == "") storeId = "0";
   br.close();
   fr.close();
  } catch (Exception e) {
   e.printStackTrace();
  }
  return storeId;
 }
 
 // 将ID写入到磁盘文件中
 public static boolean writeStoreId(String path, String storeId) {
  boolean b = false;
  try {
   File file = new File(path);
   if (!file.exists()) {
    file.createNewFile();
   }
   FileWriter fw = new FileWriter(path);
   PrintWriter out = new PrintWriter(fw);
   out.write(storeId);
   out.close();
   fw.close();
   b = true;
  } catch (IOException e) {
   e.printStackTrace();
  }
  return b;
 }
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package com.sand.mpa.sousuo;
 
public class IndexResult {
 
 private String id;
 
 private String name;
 
 private String address;
 
 private String citycode;
 
 
 public String getId() {
  return id;
 }
 public void setId(String id) {
  this.id = id;
 }
 public String getName() {
  return name;
 }
 public void setName(String name) {
  this.name = name;
 }
 
 
 public String getAddress() {
  return address;
 }
 public void setAddress(String address) {
  this.address = address;
 }
 public String getCitycode() {
  return citycode;
 }
 public void setCitycode(String citycode) {
  this.citycode = citycode;
 }
 
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
package com.sand.mpa.sousuo;
 
 
 
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
 
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.mira.lucene.analysis.IK_CAnalyzer;
 
public class IndexUtils {
 
 //0. 创建增量索引
 public static void buildIndex(String indexFile, String storeIdFile) {
  IncrementIndex.buildIndex(indexFile, storeIdFile);
 }
 
 //1. 单字段查询
 @SuppressWarnings("deprecation")
 public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field,
   String key) {
  try {
   Date date1 = new Date();
   QueryParser queryParser = new QueryParser(field, new StandardAnalyzer());
   Query query = queryParser.parse(key);
   Hits hits = indexSearcher.search(query);
   Date date2 = new Date();
   System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms");
   List<IndexResult> list = new ArrayList<IndexResult>();
   for (int i = 0; i < hits.length(); i++) {
    list.add(getIndexResult(hits.doc(i)));
   }
   return list;
  } catch (ParseException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  return null;
 }
 
 //2. 多条件查询。这里实现的是and操作
 //注:要查询的字段必须是index的
 //即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED));
 @SuppressWarnings("deprecation")
 public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields,
   String[] keys) {
 
  try {
   BooleanQuery m_BooleanQuery = new BooleanQuery();
   if (keys != null && keys.length > 0) {
    for (int i = 0; i < keys.length; i++) {
     QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer());
     Query query = queryParser.parse(keys[i]);
     m_BooleanQuery.add(query, BooleanClause.Occur.MUST);//and操作
    }
    Hits hits = indexSearcher.search(m_BooleanQuery);
    List<IndexResult> list = new ArrayList<IndexResult>();
    for (int i = 0; i < hits.length(); i++) {
     list.add(getIndexResult(hits.doc(i)));
    }
    return list;
   }
  } catch (ParseException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  return null;
 }
 
 //3.高亮显示 实现了单条件查询
 //可改造为多条件查询
 public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) throws InvalidTokenOffsetsException {
  try {
   QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
   Query query = queryParser.parse(key);
   TopDocCollector collector = new TopDocCollector(800);
   indexSearcher.search(query, collector);
   ScoreDoc[] hits = collector.topDocs().scoreDocs;
 
   Highlighter highlighter = null;
   SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>",
     "</font>");
   highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
   highlighter.setTextFragmenter(new SimpleFragmenter(200));
   List<IndexResult> list = new ArrayList<IndexResult>();
   Document doc;
   for (int i = 0; i < hits.length; i++) {
    //System.out.println(hits[i].score);
    doc = indexSearcher.doc(hits[i].doc);
    TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
      new StringReader(doc.get("name")));
    IndexResult ir = getIndexResult(doc);
    ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));
    list.add(ir);
   }
   return list;
  } catch (ParseException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  return null;
 
 }
 
 //4. 多字段查询
 @SuppressWarnings("deprecation")
 public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher,
   String[] fields, String key) {
  try {
   MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer());
   Query query = mfq.parse(key);
   Hits hits = indexSearcher.search(query);
   List<IndexResult> list = new ArrayList<IndexResult>();
   for (int i = 0; i < hits.length(); i++) {
    list.add(getIndexResult(hits.doc(i)));
   }
 
   return list;
  } catch (ParseException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  return null;
 }
 
 //5. 删除索引
 public static void deleteIndex(String indexFile, String id) throws CorruptIndexException,
   IOException {
  IndexReader indexReader = IndexReader.open(indexFile);
  indexReader.deleteDocuments(new Term("id", id));
  indexReader.close();
 }
 
 //6. 一元分词
 @SuppressWarnings("deprecation")
 public static String Standard_Analyzer(String str) {
  Analyzer analyzer = new StandardAnalyzer();
  Reader r = new StringReader(str);
  StopFilter sf = (StopFilter) analyzer.tokenStream("", r);
  System.out.println("=====StandardAnalyzer====");
  System.out.println("分析方法:默认没有词只有字(一元分词)");
  Token t;
  String results = "";
  try {
   while ((t = sf.next()) != null) {
    System.out.println(t.termText());
    results = results + " " + t.termText();
   }
  } catch (IOException e) {
   e.printStackTrace();
  }
  return results;
 }
 
 //7. 字典分词
 @SuppressWarnings("deprecation")
 public static String ik_CAnalyzer(String str) {
  Analyzer analyzer = new IK_CAnalyzer();
  Reader r = new StringReader(str);
  TokenStream ts = (TokenStream) analyzer.tokenStream("", r);
  System.out.println("=====IK_CAnalyzer====");
  System.out.println("分析方法:字典分词,正反双向搜索");
  Token t;
  String results = "";
  try {
   while ((t = ts.next()) != null) {
    System.out.println(t.termText());
    results = results + " " + t.termText();
   }
  } catch (IOException e) {
   e.printStackTrace();
  }
  System.out.println(results);
  return results;
 }
 
 //在结果中搜索
 public static void queryFromResults() {
 
 }
 
 //组装对象
 public static IndexResult getIndexResult(Document doc) {
  IndexResult ir = new IndexResult();
  ir.setId(doc.get("id"));
  ir.setName(doc.get("name"));
  ir.setAddress(doc.get("address"));
  ir.setCitycode(doc.get("citycode"));
  return ir;
 }
}
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package com.sand.mpa.sousuo;
 
 
/**
 * $Id$
 * Copyright 2009-2010 Oak Pacific Interactive. All rights reserved.
 */
 
 
import java.util.Date;
import java.util.List;
 
import org.apache.lucene.search.IndexSearcher;
import org.apache.velocity.runtime.directive.Break;
 
public class Test {
 
 //存放索引文件
 private static String indexFile = "E:\\Test\\lucene_test\\poiIdext";
 
 //存放id
 private static String storeIdFile = "E:\\Test\\lucene_test\\storeId.txt";
 
 public static void main(String[] args) throws Exception {
  //0. 创建增量索引
    
  IndexUtils.buildIndex(indexFile, storeIdFile);
  
  
  IndexSearcher indexSearcher = new IndexSearcher(indexFile);
 
  List<IndexResult> list =null;
//  String key ="";
  Date date1 = new Date();
  Date date2 =new Date();
 
  
  //1.单字段查询
//  key = IndexUtils.ik_CAnalyzer("上海");
//  list = IndexUtils.queryByOneKey(indexSearcher, "address", key);
//  date2 = new Date();
//  System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
//    + "条=======================================单字段查询");
//  printResults(list);
//  System.exit(1);
  
  //2.多条件查询
  String[] fields = { "name", "address" };
  String[] keys = { IndexUtils.ik_CAnalyzer("永城市")};
  date1 = new Date();
  list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys);
  date2 = new Date();
  System.out.println("查询耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
    + "条\n===============================多条件查询");
  printResults(list);
  System.exit(1);
  //3.高亮显示 单字段查询
//  System.out.println("\n\n");
//  date1 = new Date();
//  key="安徽";
//  list = IndexUtils.highlight(indexSearcher, key);
//  date2 = new Date();
//  System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
//    + "条\n======================================高亮显示");
//  printResults(list);
//  System.exit(1);
  //4. 多字段查询
//  date1 = new Date();
//  list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key);
//  date2 = new Date();
//  System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
//    + "条\n=====================================多字段查询");
  // printResults(list);
 
  //5. 删除索引中的字段 根据id进行删除
//  IndexUtils.deleteIndex(indexFile, "1552");
 }
 
 //打印结果
 public static void printResults(List<IndexResult> list) {
  if (list != null && list.size() > 0) {
   for (int i = 0; i < list.size(); i++) {
    System.out.println(list.get(i).getId() + "," + list.get(i).getName() + ","
      + list.get(i).getAddress() + "," + list.get(i).getCitycode()+"--->"+i);
   }
  }
 }
}
 
 
 

以上就是小编为大家带来的Java实现lucene搜索功能的方法(推荐)全部内容了,希望大家多多支持服务器之家~

延伸 · 阅读

精彩推荐