#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, sys, re
import wikipedia, query, datetime, BeautifulSoup
mysite = wikipedia.getSite()
dyktext=u'{{Template:Dyk}}'
dyktitle=u'Template:Dyk'
rsstitle=u'Template:Dyk/rss'
rsspage=wikipedia.Page(mysite,rsstitle)
dykpage=wikipedia.Page(mysite,dyktitle)
params = {
'action' :'parse',
'text' :dyktext,
}
text = query.GetData(params, encodeTitle = False)[u'parse'][u'text'][u'*']
soup = BeautifulSoup.BeautifulSoup(text)
image=u''
for i in soup:
try:
if i.name == 'ul':
if i.small <> None:
i.small.extract()
image=unicode(soup.div.renderContents())
else:
image=u''
r=ur'<li>(?P<li>.*?)</li>'
for m in re.finditer(r,unicode(i.li),re.I):
mm = m.groupdict()
#xtitle=''.join(i.findAll(text=True))
href=u'{{subst:fullurl:%s}}' % i.b.a['title']
# text=u'<p><div style="background:#F9F9F9; padding:10px; margin: auto 1em; width: 50%; border:1px solid #AAAAAA;">'+unicode(i.li)+u'</div></p>'
rtext= image+u'<p>你知道吗?</p>\n<p>'+mm['li'] + u'</p><p><a href=\"'+href+ u'\">阅读条目全文 >>></a></p>'
wikitext=u'<title>新条目推荐:%s</title>\n<link>%s</link>\n<guid>%s</guid>\n<description>%s</description>\n<pubDate>{{subst:#time:r}}</pubDate>\n<dc:creator>中文维基百科编者</dc:creator></item>\n' %(i.b.a['title'], href, href, rtext)
rsstext=rsspage.get(force=True)
ritem=rsstext.split(u'<item>')
if i.b.a['title'] not in rsstext:
if len(ritem)<7:
ritem.insert(1, wikitext)
s = u'<item>'.join(ritem)
else:
ritem.insert(1, wikitext)
ritem.pop()
s = u'<item>'.join(ritem)
s=s+u'\n</channel>\n</rss>'
rsspage.put(s, u'Bot:你知道吗rss更新: [[%s]]' % i.b.a['title'])
else:
print "DYK no update."
except AttributeError, KeyError:
pass
wikipedia.stopme()