脚本之家,脚本语言编程技术及教程分享平台!
分类导航

Python|VBS|Ruby|Lua|perl|VBA|Golang|PowerShell|Erlang|autoit|Dos|bat|

服务器之家 - 脚本之家 - Python - 对python 操作solr索引数据的实例详解

对python 操作solr索引数据的实例详解

2021-04-26 00:30shaomine Python

今天小编就为大家分享一篇对python 操作solr索引数据的实例详解,具有很好的参考价值,希望对大家有所帮助。一起跟随小编过来看看吧

测试代码1:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text
 
 
 def index_data(self):
  solr = pysolr.solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
 
  # how you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "a test document",
   },
   {
    "id": "doc_2",
    "title": "the banana: tasty or dangerous?",
   },
  ])
  print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def index_data_fromcsv(self, csvfile):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except exception,e:
     print e.message
 
   print index
   index += 1
 
#pysolr客户端代码
 def pysolr_index_data_fromcsv(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     listdocs.append(doc)
    except exception,e:
     print e.message
   index += 1
  solr = pysolr.solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

查询代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def search_data(self,message='视频'):
  url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  r = requests.get(url, verify=false)
  print r.text
  r = r.json()['response']['numfound']
  print message + ":" + str(r)
  
  #pysolr客户端
  def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numfound']
 
  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '

'

输出结果:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
 "responseheader":{
 "status":0,
 "qtime":0,
 "params":{
  "q":"title:\"\\视频\"",
  "indent":"true",
  "wt":"json"}},
 "response":{"numfound":123,"start":0,"docs":[
  {
  "source":"中彩网",
  "link":"http://www.zhcw.com/video/kaijiangshipin-3d/11981126.shtml",
  "keyword":"视频",
  "title":"福彩3d开奖 视频 -中彩 视频",
  "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  "_version_":1584214368617234432},
  {
  "source":"新浪视频",
  "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  "keyword":"视频",
  "title":"今日热门 视频 汇总20170707",
  "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  "_version_":1584214367507841024},
  {
  "source":"网易新闻",
  "link":"http://news.163.com/17/0707/13/coocnuie00018aor.html",
  "keyword":"视频",
  "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  "_version_":1584214367821365248},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7480871.shtml",
  "keyword":"视频",
  "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  "_version_":1584214366819975168},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7805858.shtml",
  "keyword":"视频",
  "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  "_version_":1584214367516229632},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7483506.shtml",
  "keyword":"视频",
  "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  "_version_":1584214367647301632},
  {
  "source":"汽车杂志",
  "link":"http://www.jiemian.com/article/1445267.html",
  "keyword":"视频",
  "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  "_version_":1584214368157958144},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  "_version_":1584213891451191296},
  {
  "source":"新浪汽车",
  "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  "keyword":"视频",
  "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  "_version_":1584213892090822656},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  "_version_":1584214366191878144}]
 }}

完整代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#-*- coding: utf-8 -*-
import csv
import os
import codecs
 
 
def readcsv(filename):
 if os.path.exists(filename):
  with open(filename, 'r') as f:
   reader = csv.reader(f)
   list = []
   for item in reader:
    list.append(item)
   return list
 
#################################################
#coding=utf-8
import json
import requests
 
import os
import time
from os import walk
import csvop
from datetime import datetime
import pysolr
import math
 
class solrclientobj:
 
 def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text
 
 def pysolr_index_data_fromcsv(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     listdocs.append(doc)
    except exception,e:
     print e.message
   index += 1
  solr = pysolr.solr(url, timeout=10)
  result = solr.add(listdocs)
  print result
 
 def index_data_fromcsv(self, csvfile):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except exception,e:
     print e.message
 
   print index
   index += 1
 
 def index_data(self):
  solr = pysolr.solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
 
  # how you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "a test document",
   },
   {
    "id": "doc_2",
    "title": "the banana: tasty or dangerous?",
   },
  ])
  print result
 
 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numfound']
 
  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '    '
 
 def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
  删除索引
  :param where: 删除的条件
  :param url: url
  :return:
  '''
  solr = pysolr.solr(url, timeout=10)
  # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  result = solr.delete(q=where) #q='*:*'删除所有索引
  print result
 
 
obj = solrclientobj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'd:/work/solr/other/exportexcels/2017-07-07_info.csv'
# obj.pysolr_index_data_fromcsv(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。

原文链接:https://www.cnblogs.com/shaosks/p/7845576.html

延伸 · 阅读

精彩推荐