Lucene之——搜索实例
生活随笔
收集整理的這篇文章主要介紹了
Lucene之——搜索实例
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
轉載請注明出處:http://blog.csdn.net/l1028386804/article/details/49287663
一個搜索功能:要求將所有包括搜索字段的文章的標題列出來(文章的內容存儲在Oracle的CLOB字段中),也就是要用Lucene實現對數據庫的大字段進行索引和搜索。創建lucene通過java定時任務來完成。
定時調用建立索引方法
package com.qqw.index;import java.util.Timer;public class IndexerServer {/** * 定時調用建立索引任務 * @author liuyazhuang* @create 2015-10-20*/ public static void main(String[] args) { String propFile = "directory.properties"; Config.setConfigFileName(propFile); Timer timer = new Timer(); LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance(); timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME)); } } 建立索引的核心實現package com.qqw.index; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.StringWriter; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.TimerTask; import oracle.sql.CLOB; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer;/** * 建立索引的任務類 * @author liuyazhuang* @create 2015-10-20*/ public class LuceneDBIndexerTask extends TimerTask { //缺省索引目錄private static String DEFAULT_INDEX_DIR="C:\\IndexDB"; //臨時索引目錄的父目錄 private File parentDir=null; //被搜索的索引文件 private static LuceneDBIndexerTask index=new LuceneDBIndexerTask(); //構造方法 private LuceneDBIndexerTask(){ String dirStr=Constant.INDEX_STORE_DIRECTORY; if(dirStr!=null&&!"".equals(dirStr)){ this.parentDir=new File(dirStr); }else{ this.parentDir=new File(DEFAULT_INDEX_DIR); } if(!this.parentDir.exists()){ this.parentDir.mkdir(); } } /** * 單實例訪問接口 * @return */ public static LuceneDBIndexerTask getInstance(){ return index; } /** * 鎖定目錄以及文件 * 只允許單線程訪問 * */ /*public synchronized void singleRunning(){ if(flag==false){ flag=true; run(parentDir); } }*/ /** * 為數據庫字段建立索引 */ public void run() { System.out.println("====LuceneDBIndexerTask$run()==============="); System.out.println("~~~開始建立索引文件~~~~~~~~~~~~~~~"); Connection conn=null; Statement stmt=null; ResultSet rs=null;String filedir="d:\\fileIndex\\blogs";File indexDir = new File(filedir);Analyzer analyzer = new IKAnalyzer();IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_33,analyzer);conf.setOpenMode(OpenMode.CREATE);try { Class.forName(Constant.DB_DRIVER_STRING).newInstance(); conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD); stmt = conn.createStatement(); rs = stmt.executeQuery(Constant.DB_QUERY_STRING); File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator); if(!file.exists()){ file.mkdir(); } IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir),conf);long startTime = new Date().getTime(); while (rs.next()) { Document doc = new Document(); doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.ANALYZED)); //對日期建立索引 String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE")); doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.ANALYZED)); //對大字段建立索引 BufferedReader in=null; String content=""; CLOB clob = (CLOB) rs.getClob("CONTENT"); if (clob != null) { //得到一個讀入流 in=new BufferedReader(clob.getCharacterStream()); StringWriter out=new StringWriter(); int c; while((c=in.read())!=-1){ out.write(c); } content=out.toString(); } doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.optimize(); writer.close(); //測試一下索引的時間 long endTime = new Date().getTime(); System.out.println("索引文件"+file.getPath()+"建立成功..."); System.out.println("這花費了" + (endTime - startTime) + " 毫秒來把文檔增加到索引里面去!"); //判斷文件目錄file下的文件個數如果大于3,就將文件建立最早的文件給刪除掉 checkFiles(parentDir); } catch (IOException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InstantiationException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IllegalAccessException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{ try { if(rs!=null){ rs.close(); } if(stmt!=null){ stmt.close(); } if(conn!=null){ conn.close(); } } catch (SQLException e) { e.printStackTrace(); } } } /** * 判斷文件目錄file下的文件個數如果大于3,就將文件建立最早的文件給刪除掉 */ public void checkFiles(File dir) { int length=dir.listFiles().length; while(length>3){ //刪除生成最早的文件 File [] files=dir.listFiles(); String[] names=dir.list(); Arrays.sort(names); File deletefile=files[0]; deleteDirectory(deletefile); length--; } } /* * 遞歸刪除一個目錄以及下面的文件 */ public boolean deleteDirectory(File path) { if( path.exists() ) { File[] files = path.listFiles(); for(int i=0; i<files.length; i++) { if(files[i].isDirectory()) { deleteDirectory(files[i]); } else { //刪除文件 files[i].delete(); } } } //刪除目錄 boolean hasdelete=path.delete(); if(hasdelete){ System.out.println("刪除索引目錄"+path); } return hasdelete; } public static void main(String[] args) { new LuceneDBIndexerTask().run(); } } 配置文件管理類:
package com.qqw.index; import java.io.IOException; import java.io.InputStream; import java.util.Properties; /** * * @author liuyazhuang* @create 2015-10-20* */ public class Config { private static Config cfg = null; private static String configFileName = null; private Properties props; public Config() { props = new java.util.Properties(); } /** * 單例訪問接口 * @return */ public synchronized static Config getInstance() { if (cfg == null) { cfg = new Config(); cfg.loadConfig(); return cfg; } else { return cfg; } } private int loadConfig() { if (configFileName != null || configFileName.length() > 0) { InputStream inputStream = Config.class.getClassLoader() .getResourceAsStream("directory.properties"); System.out.println("configFileName=" + configFileName); try { props.load(inputStream); } catch (IOException e) { e.printStackTrace(); } return 1; } return 0; } public static void setConfigFileName(String cfg) { configFileName = cfg; } public String getProperty(String keyName) { return props.getProperty(keyName); } } 常量配置 package com.qqw.index; /** * 常量配置類 * * @author liuyazhuang * @create 2015-10-20 */ public class Constant { // 隔多長時間建立一次索引 public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance() .getProperty("create_index_sleep_time"); // 索引文件存放路徑 public static final String INDEX_STORE_DIRECTORY = Config.getInstance() .getProperty("index_store_directory"); //數據庫驅動程序 public static final String DB_DRIVER_STRING = Config.getInstance() .getProperty("db_driver_string"); //數據庫連接URI public static final String DB_URI_STRING = Config.getInstance() .getProperty("db_uri_string"); //數據庫連接username public static final String DB_USERNAME= Config.getInstance() .getProperty("db_username"); //數據庫連接pwd public static final String DB_PWD= Config.getInstance() .getProperty("db_pwd"); //數據庫查詢語句db_query_str public static final String DB_QUERY_STRING= Config.getInstance() .getProperty("db_query_string"); } 數據類型處理類:
package com.qqw.index;/** * 數據類型轉換工具類 * @author liuyazhuang* @create 2015-10-20*/ public class DataTypeUtil { /** * 將對象轉換為整數型 * @param o 源對象 * @return 對應的Long值,如果出錯,則返回Long.MIN_VALUE */ public static long toLong(Object o) { if (o == null) { throw new IllegalArgumentException("該對象為空"); } String s = o.toString(); try { return Long.parseLong(s); } catch (Exception ex) { return Long.MAX_VALUE; } } } 配置文件
#== the directory for store lucene-index ========# index_store_directory=D\:/lucene/indexDB/ #======== two hours ========# #create_index_sleep_time=7200000 #======== two minutes ========# create_index_sleep_time=7200000 db_driver_string=oracle.jdbc.driver.OracleDriver db_uri_string=jdbc\:oracle\:thin\:@localhost\:1521\:orcl db_username=test db_pwd=test db_query_string=SELECT * from journalarticle 核心搜索類:
package com.qqw.search; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map;import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKQueryParser; import org.wltea.analyzer.lucene.IKSimilarity;/** * 負責搜索的類* @author liuyazhuang* @create 2015-10-20*/ public class LuceneDBQuery { private static LuceneDBQuery search = new LuceneDBQuery(); // 構造方法 private LuceneDBQuery() { } /** * 單實例訪問接口 * * @return */ public static LuceneDBQuery getInstance() { return search; } public List<Map<String,Object>> seacherStr(String[] indexFields,String[] searchFields,String queryString,String searchdictory,String[] highlighterFields) {List<Map<String,Object>> list = null;TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-onlyBooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};query = IKQueryParser.parseMultiField(searchFields, queryString,flags);// 多個//在索引器中使用IKSimilarity相似度評估器searcher.setSimilarity(new IKSimilarity());// 準備高亮器Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");Scorer fragmentScorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, fragmentScorer);Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范圍highlighter.setTextFragmenter(fragmenter);if (searcher != null) {topDocs = searcher.search(query, 100);// 100是顯示隊列的SizeScoreDoc[] hits = topDocs.scoreDocs;System.out.println("共有" + searcher.maxDoc() + "條索引,命中"+ hits.length + "條");list = new ArrayList<Map<String,Object>>();for (int i = 0; i < hits.length; i++) {//長度遍歷ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 讀取第幾條記錄int docSn = scoreDoc.doc;// 文檔內部編號Document document = searcher.doc(docSn);Map<String,Object> map=new HashMap<String, Object>();// 高亮for (int k = 0; k < indexFields.length; k++) {//遍歷所有的字段map.put(indexFields[k], document.get(indexFields[k]));for (int j = 0; j < highlighterFields.length; j++) {//遍歷要高亮的字段,要高亮的字段肯定小于等于所有的字段// 如果當前屬性值中沒有出現關鍵字,則返回nullString hctemp = highlighter.getBestFragment(new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));if (hctemp == null) {hctemp = document.get(highlighterFields[j]);}map.put(highlighterFields[j], hctemp);}}list.add(map);}}}catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return list;}// 查詢分頁public List<Map<String,Object>> seacherStrbyPage(String[] indexFields,String[] searchFields,String queryString,String searchdictory, int firstResult, int maxResult,String[] highlighterFields) {List<Map<String,Object>> list = null;TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-only// QueryParser qp = new QueryParser(Version.LUCENE_33, fields,// new StandardAnalyzer(Version.LUCENE_33));// 有變化的地方 單個字段關聯// // 使用IKQueryParser查詢分析器構造Query對象// //聲明BooleanClause.Occur[]數組,它表示多個條件之間的關系// BooleanClause.Occur[] flags=new// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};query = IKQueryParser.parseMultiField(searchFields, queryString);// 多個// //在索引器中使用IKSimilarity相似度評估器searcher.setSimilarity(new IKSimilarity());// query = IKQueryParser.parse(field, queryString);// QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_33,// fields,// new IKAnalyzer());// 有變化的地方 多個地段關聯// // query = qp.parse(queryString);// 準備高亮器Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");Scorer fragmentScorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, fragmentScorer);Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范圍highlighter.setTextFragmenter(fragmenter);if (searcher != null) {topDocs = searcher.search(query, 100);// 100是顯示隊列的SizeScoreDoc[] hits = topDocs.scoreDocs;System.out.println("共有" + searcher.maxDoc() + "條索引,命中"+ hits.length + "條");list = new ArrayList<Map<String,Object>>();for (int i = firstResult - 1; i < firstResult + maxResult - 1; i++) {//按照分頁的長度遍歷//for (int i = 0; i < hits.length; i++) {//長度遍歷ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 讀取第幾條記錄int docSn = scoreDoc.doc;// 文檔內部編號Document document = searcher.doc(docSn);Map<String,Object> map=new HashMap<String, Object>();// 高亮for (int k = 0; k < indexFields.length; k++) {//遍歷所有的字段map.put(indexFields[k], document.get(indexFields[k]));for (int j = 0; j < highlighterFields.length; j++) {//遍歷要高亮的字段,要高亮的字段肯定小于等于所有的字段// 如果當前屬性值中沒有出現關鍵字,則返回nullString hctemp = highlighter.getBestFragment(new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));if (hctemp == null) {hctemp = document.get(highlighterFields[j]);}map.put(highlighterFields[j], hctemp);}}list.add(map);}}}catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return list;}// 取得符合搜索條件的所有記錄總數,以便分頁 , 與上面方法類似public int getResultCount(String[] searchFields,String queryString, String searchdictory)throws Exception {TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-onlyquery = IKQueryParser.parseMultiField(searchFields, queryString);// 多個// //在索引器中使用IKSimilarity相似度評估器searcher.setSimilarity(new IKSimilarity());if (searcher != null) {topDocs = searcher.search(query, 100);// 100是顯示隊列的Size}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// ScoreDoc[] hits = topDocs.scoreDocs;取得還是hits的lengthreturn topDocs.scoreDocs.length;}} 配置文件管理類:
package com.qqw.search;import java.io.IOException; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; /** * 配置文件的管理類* @author liuyazhuang* @create 2015-10-20*/? public class LuceneDBQueryUtil { public static String getIndexPath(){ String filePath = "zxt_index.xml"; String indexPath=""; SAXBuilder builder = new SAXBuilder(false); try { Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath)); Element rootElement = doc.getRootElement(); Element index=rootElement.getChild("index"); indexPath=index.getText(); System.out.println(indexPath); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return indexPath; } } 通過ServletContextListener配置定時任務
package com.qqw.timer;import java.util.Timer;import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener;import com.qqw.index.Constant; import com.qqw.index.LuceneDBIndexerTask; /** * 定時操作* @author liuyazhuang* @create 2015-10-20*/? public class MyListener implements ServletContextListener {private Timer timer = null;public void contextInitialized(ServletContextEvent event) {timer = new Timer(true);//設置任務計劃,啟動和間隔時間timer.schedule(LuceneDBIndexerTask.getInstance(), 0,Long.valueOf(Constant.CREATE_INDEX_SLEEP_TIME));}public void contextDestroyed(ServletContextEvent event) {timer.cancel();} } web.xml 配置
<?xml version="1.0" encoding="UTF-8"?> <web-app version="2.4" xmlns="http://java.sun.com/xml/ns/j2ee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd"><welcome-file-list><welcome-file>index.jsp</welcome-file></welcome-file-list><!-- 配置servlet --><servlet><servlet-name>SearchServlet</servlet-name><servlet-class>dataFromOracle.servlet.SearchServlet</servlet-class></servlet><servlet-mapping><servlet-name>SearchServlet</servlet-name><url-pattern>/SearchServlet</url-pattern></servlet-mapping><listener><listener-class>com.qqw.timer.MyListener</listener-class> </listener> </web-app> 數據庫表文件
-- Create table create table JOURNALARTICLE (ARTICLEID NUMBER(10) not null,TITLE VARCHAR2(255) not null,USERNAME VARCHAR2(4000) not null,USERID VARCHAR2(255) not null,CREATEDATE TIMESTAMP(6) not null,CONTENT CLOB ); -- Create/Recreate primary, unique and foreign key constraints alter table JOURNALARTICLEadd constraint ARTICLEID primary key (ARTICLEID);通過以上的代碼,可以做到移植到新項目只需要修改配置文件即可。lucene索引建立,不需要考慮什么時候進行。只要保證數據庫連接處于正常狀態即可,索引字段和搜索字段都可以通知配置的形式表現出來。分頁功能和高亮的功能都在其中。
總結
以上是生活随笔為你收集整理的Lucene之——搜索实例的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 不同区域多机房、多台设备、微信云平台集中
- 下一篇: 时空之轮Android手柄,经典角色扮演