TwitterのUserStreamをHTMLに変換するCGIを書いた

TwitterのUserStreamをHTMLに変換するCGIを書いた。適当なサーバー(専用サーバーかVPSサーバーじゃないとプログラムの実行時間に制限があって無理かも)でログ保存用のスクリプトを動かしてログを保存しておけば、Twilogのように使える。Twilogと違って単にHTMLを表示するだけだけど、自分以外の発言もフォローやお気に入り追加も記録できる。

こんな感じ。

ログ取得

#! /usr/bin/python
# coding: utf-8

import tweepy, urllib, urllib2, datetime, time

# tweepyの解説をしているページを参考に取得
consumer_key    = "XXXXXXXXXXXXXXXXXXXXXX"
consumer_secret = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
access_key      = "99999999-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
access_secret   = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

def log(s):
    f = "log_%s.txt" % datetime.datetime.today().strftime("%Y%m%d")
    open(f,"a").write(s+"\n")
    try:
        print s
    except UnicodeEncodeError:
        pass

fail = 0

def retrieve():
    url = "https://userstream.twitter.com/2/user.json"
    param = {"delimited":"length"}
    header = {}

    auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
    auth.set_access_token(access_key,access_secret)
    auth.apply_auth(url,"POST",header,param)

    req = urllib2.Request(url,headers=header)
    r = urllib2.urlopen(req,urllib.urlencode(param),90)

    while True:
        l = ""
        while True:
            c = r.read(1)
            if c=="\n": break
            if c=="": raise Exception
            l += c
        l = l.strip()
        if not l.isdigit(): continue
        
        global fail
        fail = 0
        s = r.read(int(l)).rstrip("\r\n")
        log(s)

def main():
    while True:
        try:
            retrieve()
        except:
            log("ERROR")
            global fail
            fail += 1
            if fail>=2:
                time.sleep(min(20*2**(fail-2),240))

if __name__=="__main__":
    main()

ログ表示

#! /usr/bin/python
# coding: utf-8

import cgi, datetime, os, json, re
import cgitb; cgitb.enable()

# ログファイル名を設定 %sに日付が入る
LOG_FILE    = r"log_%s.txt"
# この日付以降のログを表示
SINCE       = datetime.date(2011,1,1)

def main():
    if "PATH_INFO" not in os.environ or os.environ["PATH_INFO"]=="" or os.environ["PATH_INFO"]=="/":
        # 日付選択画面に飛ばす
        print 'Location: %s/index.html' % os.path.basename(__file__)
        print
        return
    
    if os.environ["PATH_INFO"]=="/index.html":
        # 日付選択画面
        print 'Content-Type: text/html; charset=utf-8;'
        print 
        print '<!DOCTYPE html>'
        print '<html><head><meta charset="utf-8"><title>Log Viewer</title></head><body>'
        d = datetime.date.today()
        t = SINCE
        while d>=t:
            ds = d.strftime("%Y%m%d")
            print '<p><a href="%s.html">%s</a> <a href="%s.txt">raw</a></p>' % (ds,d,ds)
            d -= datetime.timedelta(days=1)
        print '</body></html>'
        return
    
    # 指定した日付のログを表示
    m = re.match(r"^/(\d{8})\.(html|txt)",os.environ["PATH_INFO"])
    if m:
        date = m.group(1)
        ext = m.group(2)
        
        try:
            f = open(LOG_FILE%date)
        except:
            print 'Status: 404'
            print 
            print '<html><head><meta charset="utf-8"><title>Not found</title></head>'
            print '<body><p>Not found</p></body></html>'
            return
        
        if ext=="html":
            print 'Content-Type: text/html; charset=utf-8;'
            print 
            print "<!DOCTYPE html>"
            print '<html><head><meta charset="utf-8"><title>Log Viewer</title><style type="text/css">'
            print '*{margin 0;padding 0;line-height:1.5em;}'
            print 'body{background:#c0deed;color:#333;}'
            print 'a{color:#0084b4;text-decoration:none;}'
            print 'a:hover{text-decoration:underline;}'
            print 'div.page{width:640px;margin:auto;}'
            print 'div.item{display:table;width:100%;background:white;margin:0.5em 0.5em 0.5em 1em;border:solid #888 1px;padding:0.2em;}'
            print 'div.image{display:table-cell;width:50px;padding:0.5em; vertical-align:middle;background:#eee;}'
            print 'div.content{display:table-cell;vertical-align:top;padding:0 0 0 0.5em;}'
            print 'div.footer{color:#999;font-size:small;}'
            print 'div.footer a{color:#999;}'
            print 'div.footer a:hover{color:#0084b4;}'
            print 'div.dmto{margin-left:1em;}'
            print 'span.name{color:#999;font-size:small;}'
            print 'span.name a{color:#333;font-size:medium;font-weight:bold;}'
            print 'span.name a:hover{color:#0084b4;}'
            print 'span.event{color:#888;font-weight:bold;}'
            print '</style></head><body><div class="page">'
            t = list(f)
            for l in t[::-1]:
                print tweet2html(l).encode('utf-8')
            print '</div></body></html>'
            
        if ext=="txt":
            print 'Content-Type: text/plain; charset=utf-8;'
            print 
            print f.read()
        
        return
    
    raise Exception("Parameter error")

def tweet2html(t):
    try:
        if re.match('ERROR',t):
            return '<div class="item">ERROR</div>'
        
        tw = json.loads(t)
        s = tweet2html_sub(tw)
        return s
    except Exception, e:
        return '<div>Error %s</div><div>%s</div>' % (e,t)

def tweet2html_sub(tw):
    s = ''
    
    if False:
        pass
    
    elif "friends" in tw:
        # UserStream開始時のフレンドリスト
        pass
    
    elif "retweeted_status" in tw:
        # リツイート
        u = tw["user"]
        s += '<div class="item">'
        s += '<div>'
        s += '<span class="event">RETWEET</span>&nbsp;'
        s += '<a name="%s"><img width=24 height=24 src="%s"></a>&nbsp;' % (tw["id"],u["profile_image_url"])
        s += '<span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span>&nbsp;' % (u["screen_name"],u["screen_name"],u["name"])
        s += '</div>'
        s += tweet2html_sub(tw["retweeted_status"])
        s += u'<div class="footer"><a href="https://twitter.com/%s/status/%s">%s</a> %sから</div>' % (u["screen_name"],tw["id"],time2local(tw["created_at"]),tw["source"])
        s += u'</div>'
    
    elif "event" in tw and ( tw["event"]=="follow" or tw["event"]=="user_update" or tw["event"]=="block" or tw["event"]=="unblock" ):
        # フォロー/ユーザーアップデート/ブロック/アンブロック
        sr = tw["source"]
        tr = tw["target"]
        s += '<div class="item">'
        s += '<div>'
        s += '<span class="event">%s</span>&nbsp;' % tw["event"].upper()
        s += '<img width=24 height=24 src="%s">&nbsp;' % sr["profile_image_url"]
        s += '<span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span>&nbsp;' % (sr["screen_name"],sr["screen_name"],sr["screen_name"])
        s += '</div>'
        s += '<div class="item">'
        s += '<div class="image"><img width=48 height=48 src="%s"></div>' % tr["profile_image_url"]
        s += '<div class="content">'
        s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span></div>' % (tr["screen_name"],tr["screen_name"],tr["name"])
        s += '<div>%s</div>' % tr["description"]
        s += '</div>'
        s += '</div>'
        s += '<div class="footer">%s</div>' % time2local(tw["created_at"])
        s += '</div>'
    
    elif "event" in tw and ( tw["event"]=="favorite" or tw["event"]=="unfavorite" ):
        # お気に入り/お気に入り解除
        sr = tw["source"]
        s += '<div class="item">'
        s += '<div>'
        s += '<span class="event">%s</span>&nbsp;' % tw["event"].upper()
        s += '<img width=24 height=24 src="%s">&nbsp;' % sr["profile_image_url"]
        s += '<span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span>&nbsp;' % (sr["screen_name"],sr["screen_name"],sr["screen_name"])
        s += '</div>'
        s += tweet2html_sub(tw["target_object"])
        s += '<div class="footer">%s</div>' % time2local(tw["created_at"])
        s += '</div>'
    
    elif "event" in tw and ( tw["event"]=="list_member_added" or tw["event"]=="list_member_removed" ):
        # リスト追加/削除
        sr = tw["source"]
        tr = tw["target_object"]
        s += '<div class="item">'
        s += '<div>'
        s += '<span class="event">%s</span>&nbsp;' % tw["event"].upper().replace("_"," ")
        s += '<img width=24 height=24 src="%s">&nbsp;' % sr["profile_image_url"]
        s += '<span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span>&nbsp;' % (sr["screen_name"],sr["screen_name"],sr["screen_name"])
        s += '</div>'
        s += '<div class="item">'
        s += '<div class="image"><img width=48 height=48 src="%s"></div>' % tr["user"]["profile_image_url"]
        s += '<div class="content">'
        s += '<div><a href="https://twitter.com/%s">%s</a></div>' % (tr["uri"],tr["full_name"])
        s += '<div>%s</div>' % tr["description"]
        s += '</div>'
        s += '</div>'
        s += '<div class="footer">%s</div>' % time2local(tw["created_at"])
        s += '</div>'
    
    elif "direct_message" in tw:
        # ダイレクトメッセージ
        rec = tw["direct_message"]["recipient"]
        send = tw["direct_message"]["sender"]
        s += '<div class="item">'
        s += '<div><span class="event">DIRECT MESSAGE</span></div>'
        s += '<div class="dmto">TO:&nbsp;'
        s += '<img width=24 height=24 src="%s">&nbsp;' % rec["profile_image_url"]
        s += '<span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span>&nbsp;' % (rec["screen_name"],rec["screen_name"],rec["screen_name"])
        s += '</div>'
        s += '<div class="item">'
        s += '<div class="image"><img width=48 height=48 src="%s"></div>' % (send["profile_image_url"])
        s += '<div class="content">'
        s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span></div>' % (send["screen_name"],send["screen_name"],send["name"])
        s += '<div>%s</div>' % tw["direct_message"]["text"]
        s += '<div class="footer">%s</div>' % time2local(tw["direct_message"]["created_at"])
        s += '</div>'
        s += '</div>'
        s += '</div>'
    
    elif "delete" in tw:
        # 削除
        d = tw["delete"]["status"]
        s += '<div class="item">'
        s += '<div>'
        s += '<span class="event">DELETE</span>&nbsp;'
        s += 'UserID: %s' % d["user_id"]
        s += '</div>'
        s += '<div class="item">StatusID: <a href="#%s">%s</a></div>' % (d["id"],d["id"])
        s += '</div>'
        
    else:
        # 通常のツイート
        u = tw["user"]
        
        # リンクを処理
        text = tw["text"]
        if "entities" in tw:
            entity = []
            for x in tw["entities"]["hashtags"]:
                entity += [(x["indices"],"https://twitter.com/search?q=%23"+x["text"])]
            for x in tw["entities"]["user_mentions"]:
                entity += [(x["indices"],"https://twitter.com/"+x["screen_name"])]
            # TODO: 本家は<a href="url">expanded_url</a>
            for x in tw["entities"]["urls"]:
                entity += [(x["indices"],x["expanded_url"] if x["expanded_url"] else x["url"])]
            entity.sort()
            for x in entity[::-1]:
                text = text[:x[0][0]] + '<a href="%s">%s</a>'%(x[1],text[x[0][0]:x[0][1]]) + text[x[0][1]:]
        
        s = ''
        s += '<div class="item">'
        s += '<div class="image"><a name="%s"><img width=48 height=48 src="%s"></a></div>' % (tw["id"],u["profile_image_url"])
        s += '<div class="content">'
        s += '<div><span class="name"><a href="https://twitter.com/%s">%s</a>&nbsp;%s</span></div>' % (u["screen_name"],u["screen_name"],u["name"])
        s += '<div>%s</div>' % text
        s += '<div class="footer">'
        s += '<a href="https://twitter.com/%s/status/%s">%s</a>&nbsp;' % (u["screen_name"],tw["id"],time2local(tw["created_at"]))
        s += u'%sから&nbsp;' % tw["source"]
        if tw["in_reply_to_status_id"]!=None:
            s += u'<a href="#%s">%s宛</a>' % (tw["in_reply_to_status_id"],tw["in_reply_to_screen_name"])
            s += u'<a href="https://twitter.com/%s/status/%s">tw</a>&nbsp;' % (tw["in_reply_to_screen_name"],tw["in_reply_to_status_id"])
        retweet_count = tw["retweet_count"]
        if retweet_count!=0:
            if retweet_count!="100+":  retweet_count += 1
            s += u'%s回リツイート&nbsp;' % retweet_count
        if tw["geo"]!=None and "coordinates" in tw["geo"]:
            c = tw["geo"]["coordinates"]
            s += u'<a href="https://maps.google.com/maps?q=%s,%s">ここ▼</a>から&nbsp;' % (c[0],c[1])
        s += '</div>'
        s += '</div>'
        s += '</div>'
    return s

def time2local(t):
    tm = datetime.datetime.strptime(t,"%a %b %d %H:%M:%S +0000 %Y")
    tm += datetime.timedelta(hours=9)
    return str(tm)
        
if __name__=="__main__":
    main()
  • 2011/03/31 受信が止まるけど例外が発生せず再接続できない時があるので、read(1)が空文字列を返したらエラーにしてみる
  • 2011/03/29 unblockに対応