I am a PHP developer exploring the outside world. I have decided to start learning Python.
The below script is my first attempt at porting a PHP script to Python. Its job is to take tweets from a Redis store. The tweets are coming from Twitter’s Streaming API and stored as JSON objects. Then the information needed is extracted and dumped into a CSV file to be imported into MySQL using the LOAD DATA LOCAL INFILE that is hosted on a different server.
So, the question is: Now that I have my first Python script running, how can I make it more Pythonic? Are there any suggestions that you guys have? Make it better? Tricks I should know about? Constructive Criticism?
Update: Having taken everyone’s suggestions thus far, here is the updated version:
Update2: Ran the code through pylint. Now scores a 9.89/10. Any other suggestions?
# -*- coding: utf-8 -*-
"""Redis IO Loop for Tweelay Bot"""
from __future__ import with_statement
import simplejson
import re
import datetime
import time
import csv
import hashlib
# Bot Modules
import tweelay.red as red
import tweelay.upload as upload
import tweelay.openanything as openanything
__version__ = "4"
def process_tweets():
"""Processes 0-20 tweets from Redis store"""
data = []
last_id = 0
for i in range(20):
last = red.pop_tweet()
if not last:
break
t = TweetHandler(last)
t.cleanup()
t.extract()
if t.get_tweet_id() == last_id:
break
tweet = t.proc()
if tweet:
data = data + [tweet]
last_id = t.get_tweet_id()
time.sleep(0.01)
if not data:
return False
ch = CSVHandler(data)
ch.pack_csv()
ch.uploadr()
source = "http://bot.tweelay.net/tweets.php"
openanything.openAnything(
source,
etag=None,
lastmodified=None,
agent="Tweelay/%s (Redis)" % __version__
)
class TweetHandler:
"""Cleans, Builds and returns needed data from Tweet"""
def __init__(self, json):
self.json = json
self.tweet = None
self.tweet_id = 0
self.j = None
def cleanup(self):
"""Takes JSON encoded tweet and cleans it up for processing"""
self.tweet = unicode(self.json, "utf-8")
self.tweet = re.sub('^s:[0-9]+:["]+', '', self.tweet)
self.tweet = re.sub('\n["]+;$', '', self.tweet)
def extract(self):
"""Takes cleaned up JSON encoded tweet and extracts the datas we need"""
self.j = simplejson.loads(self.tweet)
def proc(self):
"""Builds the datas from the JSON object"""
try:
return self.build()
except KeyError:
if 'delete' in self.j:
return None
else:
print ";".join(["%s=%s" % (k, v) for k, v in self.j.items()])
return None
def build(self):
"""Builds tuple from JSON tweet"""
return (
self.j['user']['id'],
self.j['user']['screen_name'].encode('utf-8'),
self.j['text'].encode('utf-8'),
self.j['id'],
self.j['in_reply_to_status_id'],
self.j['in_reply_to_user_id'],
self.j['created_at'],
__version__ )
def get_tweet_id(self):
"""Return Tweet ID"""
if 'id' in self.j:
return self.j['id']
if 'delete' in self.j:
return self.j['delete']['status']['id']
class CSVHandler:
"""Takes list of tweets and saves them to a CSV
file to be inserted into MySQL data store"""
def __init__(self, data):
self.data = data
self.file_name = self.gen_file_name()
def gen_file_name(self):
"""Generate unique file name"""
now = datetime.datetime.now()
hashr = hashlib.sha1()
hashr.update(str(now))
hashr.update(str(len(self.data)))
hash_str = hashr.hexdigest()
return hash_str+'.csv'
def pack_csv(self):
"""Save tweet data to CSV file"""
with open('tmp/'+self.file_name, mode='ab') as ofile:
writer = csv.writer(
ofile, delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
writer.writerows(self.data)
def uploadr(self):
"""Upload file to remote host"""
url = "http://example.com/up.php?filename="+self.file_name
uploadr = upload.upload_file(url, 'tmp/'+self.file_name)
if uploadr[0] == 200:
print "Upload: 200 - ("+str(len(self.data))+")", self.file_name
print "-------"
#os.remove('tmp/'+self.file_name)
else:
print "Upload Error:", uploadr[0]
if __name__ == "__main__":
while True:
process_tweets()
time.sleep(1)
Instead of:
code:
Same semantics, more compact and Pythonic.
Instead of
do just
since
Noneis false-ish anyway (sonot lastisTruewhenlastisNone). In general, when you want to check if something isNone, codeis None, not== None`.In
lose the redundant parentheses and the obsolete
<>operator and code insteadand also remove the redundant parentheses from other
ifstatements.Instead of:
code:
since any empty container is false-ish.
In
code instead
since the method already returns a string, making the
strcall redundant.Instead of:
use
which does the loop on your behalf.
Instead of
use (in Python 2.6 or better, or in 2.5 by starting the module with
to “import from the future” the
withstatement feature):which guarantees to do the close for you (including in cases where an exception might be raised).
Instead of
use
and similarly for other
printstatements — the comma inserts a space for you.I’m sure there are more such little things, but these are a few that “jumped to the eye” as I was scanning your code.