TwitterのUserStreamをHTMLに変換するCGIを書いた
TwitterのUserStreamをHTMLに変換するCGIを書いた。適当なサーバー(専用サーバーかVPSサーバーじゃないとプログラムの実行時間に制限があって無理かも)でログ保存用のスクリプトを動かしてログを保存しておけば、Twilogのように使える。Twilogと違って単にHTMLを表示するだけだけど、自分以外の発言もフォローやお気に入り追加も記録できる。
こんな感じ。
ログ取得
#! /usr/bin/python # coding: utf-8 import tweepy, urllib, urllib2, datetime, time # tweepyの解説をしているページを参考に取得 consumer_key = "XXXXXXXXXXXXXXXXXXXXXX" consumer_secret = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" access_key = "99999999-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" access_secret = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" def log(s): f = "log_%s.txt" % datetime.datetime.today().strftime("%Y%m%d") open(f,"a").write(s+"\n") try: print s except UnicodeEncodeError: pass fail = 0 def retrieve(): url = "https://userstream.twitter.com/2/user.json" param = {"delimited":"length"} header = {} auth = tweepy.OAuthHandler(consumer_key,consumer_secret) auth.set_access_token(access_key,access_secret) auth.apply_auth(url,"POST",header,param) req = urllib2.Request(url,headers=header) r = urllib2.urlopen(req,urllib.urlencode(param),90) while True: l = "" while True: c = r.read(1) if c=="\n": break if c=="": raise Exception l += c l = l.strip() if not l.isdigit(): continue global fail fail = 0 s = r.read(int(l)).rstrip("\r\n") log(s) def main(): while True: try: retrieve() except: log("ERROR") global fail fail += 1 if fail>=2: time.sleep(min(20*2**(fail-2),240)) if __name__=="__main__": main()
ログ表示
#! /usr/bin/python # coding: utf-8 import cgi, datetime, os, json, re import cgitb; cgitb.enable() # ログファイル名を設定 %sに日付が入る LOG_FILE = r"log_%s.txt" # この日付以降のログを表示 SINCE = datetime.date(2011,1,1) def main(): if "PATH_INFO" not in os.environ or os.environ["PATH_INFO"]=="" or os.environ["PATH_INFO"]=="/": # 日付選択画面に飛ばす print 'Location: %s/index.html' % os.path.basename(__file__) print return if os.environ["PATH_INFO"]=="/index.html": # 日付選択画面 print 'Content-Type: text/html; charset=utf-8;' print print '<!DOCTYPE html>' print '<html><head><meta charset="utf-8"><title>Log Viewer</title></head><body>' d = datetime.date.today() t = SINCE while d>=t: ds = d.strftime("%Y%m%d") print '<p><a href="%s.html">%s</a> <a href="%s.txt">raw</a></p>' % (ds,d,ds) d -= datetime.timedelta(days=1) print '</body></html>' return # 指定した日付のログを表示 m = re.match(r"^/(\d{8})\.(html|txt)",os.environ["PATH_INFO"]) if m: date = m.group(1) ext = m.group(2) try: f = open(LOG_FILE%date) except: print 'Status: 404' print print '<html><head><meta charset="utf-8"><title>Not found</title></head>' print '<body><p>Not found</p></body></html>' return if ext=="html": print 'Content-Type: text/html; charset=utf-8;' print print "<!DOCTYPE html>" print '<html><head><meta charset="utf-8"><title>Log Viewer</title><style type="text/css">' print '*{margin 0;padding 0;line-height:1.5em;}' print 'body{background:#c0deed;color:#333;}' print 'a{color:#0084b4;text-decoration:none;}' print 'a:hover{text-decoration:underline;}' print 'div.page{width:640px;margin:auto;}' print 'div.item{display:table;width:100%;background:white;margin:0.5em 0.5em 0.5em 1em;border:solid #888 1px;padding:0.2em;}' print 'div.image{display:table-cell;width:50px;padding:0.5em; vertical-align:middle;background:#eee;}' print 'div.content{display:table-cell;vertical-align:top;padding:0 0 0 0.5em;}' print 'div.footer{color:#999;font-size:small;}' print 'div.footer a{color:#999;}' print 'div.footer a:hover{color:#0084b4;}' print 'div.dmto{margin-left:1em;}' print 'span.name{color:#999;font-size:small;}' print 'span.name a{color:#333;font-size:medium;font-weight:bold;}' print 'span.name a:hover{color:#0084b4;}' print 'span.event{color:#888;font-weight:bold;}' print '</style></head><body><div class="page">' t = list(f) for l in t[::-1]: print tweet2html(l).encode('utf-8') print '</div></body></html>' if ext=="txt": print 'Content-Type: text/plain; charset=utf-8;' print print f.read() return raise Exception("Parameter error") def tweet2html(t): try: if re.match('ERROR',t): return '<div class="item">ERROR</div>' tw = json.loads(t) s = tweet2html_sub(tw) return s except Exception, e: return '<div>Error %s</div><div>%s</div>' % (e,t) def tweet2html_sub(tw): s = '' if False: pass elif "friends" in tw: # UserStream開始時のフレンドリスト pass elif "retweeted_status" in tw: # リツイート u = tw["user"] s += '<div class="item">' s += '<div>' s += '<span class="event">RETWEET</span> ' s += '<a name="%s"><img width=24 height=24 src="%s"></a> ' % (tw["id"],u["profile_image_url"]) s += '<span class="name"><a href="https://twitter.com/%s">%s</a> %s</span> ' % (u["screen_name"],u["screen_name"],u["name"]) s += '</div>' s += tweet2html_sub(tw["retweeted_status"]) s += u'<div class="footer"><a href="https://twitter.com/%s/status/%s">%s</a> %sから</div>' % (u["screen_name"],tw["id"],time2local(tw["created_at"]),tw["source"]) s += u'</div>' elif "event" in tw and ( tw["event"]=="follow" or tw["event"]=="user_update" or tw["event"]=="block" or tw["event"]=="unblock" ): # フォロー/ユーザーアップデート/ブロック/アンブロック sr = tw["source"] tr = tw["target"] s += '<div class="item">' s += '<div>' s += '<span class="event">%s</span> ' % tw["event"].upper() s += '<img width=24 height=24 src="%s"> ' % sr["profile_image_url"] s += '<span class="name"><a href="https://twitter.com/%s">%s</a> %s</span> ' % (sr["screen_name"],sr["screen_name"],sr["screen_name"]) s += '</div>' s += '<div class="item">' s += '<div class="image"><img width=48 height=48 src="%s"></div>' % tr["profile_image_url"] s += '<div class="content">' s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a> %s</span></div>' % (tr["screen_name"],tr["screen_name"],tr["name"]) s += '<div>%s</div>' % tr["description"] s += '</div>' s += '</div>' s += '<div class="footer">%s</div>' % time2local(tw["created_at"]) s += '</div>' elif "event" in tw and ( tw["event"]=="favorite" or tw["event"]=="unfavorite" ): # お気に入り/お気に入り解除 sr = tw["source"] s += '<div class="item">' s += '<div>' s += '<span class="event">%s</span> ' % tw["event"].upper() s += '<img width=24 height=24 src="%s"> ' % sr["profile_image_url"] s += '<span class="name"><a href="https://twitter.com/%s">%s</a> %s</span> ' % (sr["screen_name"],sr["screen_name"],sr["screen_name"]) s += '</div>' s += tweet2html_sub(tw["target_object"]) s += '<div class="footer">%s</div>' % time2local(tw["created_at"]) s += '</div>' elif "event" in tw and ( tw["event"]=="list_member_added" or tw["event"]=="list_member_removed" ): # リスト追加/削除 sr = tw["source"] tr = tw["target_object"] s += '<div class="item">' s += '<div>' s += '<span class="event">%s</span> ' % tw["event"].upper().replace("_"," ") s += '<img width=24 height=24 src="%s"> ' % sr["profile_image_url"] s += '<span class="name"><a href="https://twitter.com/%s">%s</a> %s</span> ' % (sr["screen_name"],sr["screen_name"],sr["screen_name"]) s += '</div>' s += '<div class="item">' s += '<div class="image"><img width=48 height=48 src="%s"></div>' % tr["user"]["profile_image_url"] s += '<div class="content">' s += '<div><a href="https://twitter.com/%s">%s</a></div>' % (tr["uri"],tr["full_name"]) s += '<div>%s</div>' % tr["description"] s += '</div>' s += '</div>' s += '<div class="footer">%s</div>' % time2local(tw["created_at"]) s += '</div>' elif "direct_message" in tw: # ダイレクトメッセージ rec = tw["direct_message"]["recipient"] send = tw["direct_message"]["sender"] s += '<div class="item">' s += '<div><span class="event">DIRECT MESSAGE</span></div>' s += '<div class="dmto">TO: ' s += '<img width=24 height=24 src="%s"> ' % rec["profile_image_url"] s += '<span class="name"><a href="https://twitter.com/%s">%s</a> %s</span> ' % (rec["screen_name"],rec["screen_name"],rec["screen_name"]) s += '</div>' s += '<div class="item">' s += '<div class="image"><img width=48 height=48 src="%s"></div>' % (send["profile_image_url"]) s += '<div class="content">' s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a> %s</span></div>' % (send["screen_name"],send["screen_name"],send["name"]) s += '<div>%s</div>' % tw["direct_message"]["text"] s += '<div class="footer">%s</div>' % time2local(tw["direct_message"]["created_at"]) s += '</div>' s += '</div>' s += '</div>' elif "delete" in tw: # 削除 d = tw["delete"]["status"] s += '<div class="item">' s += '<div>' s += '<span class="event">DELETE</span> ' s += 'UserID: %s' % d["user_id"] s += '</div>' s += '<div class="item">StatusID: <a href="#%s">%s</a></div>' % (d["id"],d["id"]) s += '</div>' else: # 通常のツイート u = tw["user"] # リンクを処理 text = tw["text"] if "entities" in tw: entity = [] for x in tw["entities"]["hashtags"]: entity += [(x["indices"],"https://twitter.com/search?q=%23"+x["text"])] for x in tw["entities"]["user_mentions"]: entity += [(x["indices"],"https://twitter.com/"+x["screen_name"])] # TODO: 本家は<a href="url">expanded_url</a> for x in tw["entities"]["urls"]: entity += [(x["indices"],x["expanded_url"] if x["expanded_url"] else x["url"])] entity.sort() for x in entity[::-1]: text = text[:x[0][0]] + '<a href="%s">%s</a>'%(x[1],text[x[0][0]:x[0][1]]) + text[x[0][1]:] s = '' s += '<div class="item">' s += '<div class="image"><a name="%s"><img width=48 height=48 src="%s"></a></div>' % (tw["id"],u["profile_image_url"]) s += '<div class="content">' s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a> %s</span></div>' % (u["screen_name"],u["screen_name"],u["name"]) s += '<div>%s</div>' % text s += '<div class="footer">' s += '<a href="https://twitter.com/%s/status/%s">%s</a> ' % (u["screen_name"],tw["id"],time2local(tw["created_at"])) s += u'%sから ' % tw["source"] if tw["in_reply_to_status_id"]!=None: s += u'<a href="#%s">%s宛</a>' % (tw["in_reply_to_status_id"],tw["in_reply_to_screen_name"]) s += u'<a href="https://twitter.com/%s/status/%s">tw</a> ' % (tw["in_reply_to_screen_name"],tw["in_reply_to_status_id"]) retweet_count = tw["retweet_count"] if retweet_count!=0: if retweet_count!="100+": retweet_count += 1 s += u'%s回リツイート ' % retweet_count if tw["geo"]!=None and "coordinates" in tw["geo"]: c = tw["geo"]["coordinates"] s += u'<a href="https://maps.google.com/maps?q=%s,%s">ここ▼</a>から ' % (c[0],c[1]) s += '</div>' s += '</div>' s += '</div>' return s def time2local(t): tm = datetime.datetime.strptime(t,"%a %b %d %H:%M:%S +0000 %Y") tm += datetime.timedelta(hours=9) return str(tm) if __name__=="__main__": main()
- 2011/03/31 受信が止まるけど例外が発生せず再接続できない時があるので、read(1)が空文字列を返したらエラーにしてみる
- 2011/03/29 unblockに対応