XMPPボット-辞書ボットの作成

Last-modified: 2014-08-16 (土) 12:41:50 (1992d)

[ 前のページ : XMPPボット-ボット起動・停止用シェルスクリプトの作成 | ]

入力単語を eow.alc.co.jp で辞書検索します。

HTMLのパースが泥くさくて申し訳ない。

xmpp-dictionarybot.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

#
# 辞書ボット
#

import xmpp
import config as CONFIG

def eow(word) :
	import urllib
	import sys,traceback
	
	viewCount = 3
	
	urlPrefix = "http://eow.alc.co.jp/"
	urlSuffix = "/UTF-8/"
	
	#cutoffBefore = u'<!-- ▼検索結果 -->'
	cutoffBefore  = u'<div id="resultList"class="mr_10">'
	cutoffBefore2 = u'<ul>'
	#cutoffAfter  = u'<!-- ▲end: 検索結果 -->'
	cutoffAfter   = u'<script language="JavaScript">'
	cutoffAfter2  = u'</ul>'

	divSeparator = u'<span class="midashi">'
	#divSeparator = u'</li><li>'
	#endOfSubject = u'</span>'
	endOfSubject = u'</span>'
	
	buffer=""
	
	url = urlPrefix + urllib.quote(word.encode('utf-8')) + urlSuffix
	
	try:
		print "url=",url
		d = urllib.urlopen(url)
		strSource = d.read()
		d.close
		strSource = strSource.decode('utf-8')
		cb = -9
		ca = -1
		try:
			cb = strSource.find(cutoffBefore)
			print "cb =",cb,
			if cb < 0 : raise
			strSource=strSource[cb:]
			
			ca = strSource.find(cutoffAfter)
			print "| ca =",ca,
			if ca < 0 : raise
			strSource=strSource[0:ca]
			
			cb = strSource.find(cutoffBefore2)
			print "| cb =",cb,
			if cb < 0 : raise
			strSource=strSource[cb:]
			
			ca = strSource.find(cutoffAfter2)
			print "| ca =",ca
			if ca < 0 : raise
			strSource=strSource[0:ca]
			
			
			divs = strSource.split(divSeparator)
			print "len(divs) =",len(divs)
			if len(divs) <= 1 : raise
			
			for i in range(1, viewCount +1) :
				subject,body = divs[i].split(endOfSubject,1)
				
				subject = stripTags(subject).strip()
				body    = stripTags(body).strip()
				
				buffer += "["+subject+"]\n"+body+"\n------------------\n"
				
			#buffer += url
		except:
			traceback.print_exc(file=sys.stdout)
			
			buffer += u"ソース解析に失敗しました\n"
			buffer += "cb ="+str(cb)+" | ca ="+str(ca)+"\n"
			buffer += u"-------------------------\n"
			buffer += strSource+"\n"
		
	except:
		traceback.print_exc(file=sys.stdout)
		
		buffer += u"URLが開けません.\n"
		
	buffer += url
	return buffer

def stripTags(s):
	buffer = ''
	mode = 1
	for i in s:
		if   i == '<' : mode = 0
		elif i == '>' : mode = 1
		elif mode == 1: buffer += i
		else          : pass
	return buffer

def parseMessage(conn,mess):
	
	text=mess.getBody()
	user=mess.getFrom()
	
	if text is None: return
	
	reply = eow(text)
	conn.send(xmpp.Message(mess.getFrom(),reply))
	
class ConnectionError: pass
class AuthorizationError: pass

class Bot:
	def __init__(self, JID, Password,Server,Port):
		jid = xmpp.JID(JID)
		self.connection = xmpp.Client(jid.getDomain(), debug=[])
		
		result = self.connection.connect(server=(Server,Port))
		if result is None: raise ConnectionError
		result = self.connection.auth(jid.getNode(), Password)
		if result is None: raise AuthorizationError
		
		self.connection.RegisterHandler('message',parseMessage)
		self.connection.sendInitPresence()
		
	def loop(self):
		try:
			while self.connection.Process(1):
				pass
		except KeyboardInterrupt:
			pass

bot = Bot(**CONFIG.account)
bot.loop()

config.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

account = {
	'JID'     : 'dictionary-bot@example.com',
	'Password': 'PASSWORD',
	'Server'  : '127.0.0.1',
	'Port'    : 5222,
}