I’m trying to modify the code of naive bayes classifier provided by the excellent book Programming Collective Intelligence, adapting it to the GAE datastore (the provided code uses pysqlite2). But trying to do it, I’m encountering in this line:
update.count = count + 1
from this block:
def incf(self,f,cat):
count=self.fcount(f,cat)
if count==0:
fc_value = fc(feature = f, category = cat, count = 1)
fc_value.put()
else:
update = db.GqlQuery("SELECT count FROM fc where feature =:feature AND category =:category", feature = f, category = cat).get()
update.count = count + 1
update.put()
# self.con.execute(
# "update fc set count=%d where feature='%s' and category='%s'"
# % (count+1,f,cat))
This error:
File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 221, in post
sampletrain(nb)
File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 206, in sampletrain
cl.train('Nobody owns the water.','good')
File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 144, in train
self.incf(f,cat)
File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 82, in incf
update.count = count + 1
TypeError: unsupported operand type(s) for +: 'IntegerProperty' and 'int'
And I can not understand: why I cannot increment count with 1?
Why update = db.GqlQuery("SELECT count FROM cc where category =:category", category = cat).get() is an ‘IntegerProperty’ and not an integer?
update.count = count + 1
How can I fix? What is the correct syntax to do this update.count = count + 1 operation?
Here the entire code:
import os
import random
import re
import math
from google.appengine.ext import db
import webapp2
import jinja2
from jinja2 import Environment, FileSystemLoader
jinja_environment = jinja2.Environment(autoescape=True,
loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')))
class fc(db.Model):
feature = db.StringProperty()
category = db.StringProperty()
count = db.IntegerProperty()
class cc(db.Model):
category = db.StringProperty()
count = db.IntegerProperty()
def getfeatures(doc):
splitter=re.compile('\\W*')
# Split the words by non-alpha characters
words=[s.lower() for s in splitter.split(doc)
if len(s)>2 and len(s)<20]
return dict([(w,1) for w in words])
class classifier:
def __init__(self,getfeatures, filename=None):
# Counts of feature/category combinations
self.fc={}
# Counts of documents in each category
self.cc={}
self.getfeatures=getfeatures
# def setdb(self,dbfile):
# self.con=sqlite.connect('db_file')
# self.con=sqlite3.connect(":memory:")
# self.con.execute('create table if not exists fc(feature,category,count)')
# self.con.execute('create table if not exists cc(category,count)')
def incf(self,f,cat):
count=self.fcount(f,cat)
if count==0:
fc_value = fc(feature = f, category = cat, count = 1)
fc_value.put()
else:
update = db.GqlQuery("SELECT count FROM fc where feature =:feature AND category =:category", feature = f, category = cat).get()
update.count = count + 1
update.put()
# self.con.execute(
# "update fc set count=%d where feature='%s' and category='%s'"
# % (count+1,f,cat))
def fcount(self,f,cat):
res = db.GqlQuery("SELECT * FROM fc WHERE feature =:feature AND category =:category", feature = f, category = cat).get()
logging.debug('This is a log message.')
# res=self.con.execute(
# 'select count from fc where feature="%s" and category="%s"'
# %(f,cat)).fetchone()
if res is None: return 0
else:
res = fc.count
return res
# return float(res[0])
def incc(self,cat):
count=self.catcount(cat)
if count==0:
# self.con.execute("insert into cc values ('%s',1)" % (cat))
cc_value = cc(category = cat, count = 1)
cc_value.put()
else:
update = db.GqlQuery("SELECT count FROM cc where category =:category", category = cat).get()
update.count = count + 1
update.put()
# self.con.execute("update cc set count=%d where category='%s'"
# % (count+1,cat))
def catcount(self,cat):
# res=self.con.execute('select count from cc where category="%s"'
# %(cat)).fetchone()
res = db.GqlQuery("SELECT count FROM cc WHERE category =:category", category = cat).get()
if res is None: return 0
# else: return float(res[0])
else: return float(res)
def categories(self):
# cur = self.con.execute('select category from cc');
cur = db.GqlQuery("SELECT category FROM cc").fetch(999)
return [d[0] for d in cur]
def totalcount(self):
# res=self.con.execute('select sum(count) from cc').fetchone();
all_cc = db.GqlQuery("SELECT * FROM cc").fetch(999)
res = 0
for cc in all_cc:
count = cc.count
res+=count
# res = db.GqlQuery("SELECT sum(count) FROM cc").get()
# if res==None: return 0
if res == 0: return 0
# return res[0]
return res
def train(self,item,cat):
features=self.getfeatures(item)
# Increment the count for every feature with this category
for f in features.keys():
## for f in features:
self.incf(f,cat)
# Increment the count for this category
self.incc(cat)
# self.con.commit()
def fprob(self,f,cat):
if self.catcount(cat)==0: return 0
# The total number of times this feature appeared in this
# category divided by the total number of items in this category
return self.fcount(f,cat)/self.catcount(cat)
def weightedprob(self,f,cat,prf,weight=1.0,ap=0.5):
# Calculate current probability
basicprob=prf(f,cat)
# Count the number of times this feature has appeared in
# all categories
totals=sum([self.fcount(f,c) for c in self.categories()])
# Calculate the weighted average
bp=((weight*ap)+(totals*basicprob))/(weight+totals)
return bp
class naivebayes(classifier):
def __init__(self,getfeatures):
classifier.__init__(self, getfeatures)
self.thresholds={}
def docprob(self,item,cat):
features=self.getfeatures(item)
# Multiply the probabilities of all the features together
p=1
for f in features: p*=self.weightedprob(f,cat,self.fprob)
return p
def prob(self,item,cat):
catprob=self.catcount(cat)/self.totalcount()
docprob=self.docprob(item,cat)
return docprob*catprob
def setthreshold(self,cat,t):
self.thresholds[cat]=t
def getthreshold(self,cat):
if cat not in self.thresholds: return 1.0
return self.thresholds[cat]
def classify(self,item,default=None):
probs={}
# Find the category with the highest probability
max=0.0
for cat in self.categories():
probs[cat]=self.prob(item,cat)
if probs[cat]>max:
max=probs[cat]
best=cat
# Make sure the probability exceeds threshold*next best
for cat in probs:
if cat==best: continue
if probs[cat]*self.getthreshold(best)>probs[best]: return default
return best
def sampletrain(cl):
cl.train('Nobody owns the water.','good')
cl.train('the quick rabbit jumps fences','good')
cl.train('buy pharmaceuticals now','bad')
cl.train('make quick money at the online casino','bad')
cl.train('the quick brown fox jumps','good')
class MainHandler(webapp2.RequestHandler):
def get(self):
template_values = {"given_sentence":'put a name here'}
template = jinja_environment.get_template('index.html')
self.response.out.write(template.render(template_values))
def post(self):
nb = naivebayes(getfeatures)
sampletrain(nb)
given_sentence = self.request.get("given_sentence")
spam_result = nb.classify(given_sentence)
submit_button = self.request.get("submit_button")
if submit_button:
self.redirect('/test_result?spam_result=%s&given_sentence=%s' % (spam_result, given_sentence))
class test_resultHandler(webapp2.RequestHandler):
def get(self):
spam_result = self.request.get("spam_result")
given_sentence = self.request.get("given_sentence")
test_result_values = {"spam_result": spam_result,
"given_sentence": given_sentence}
template = jinja_environment.get_template('test_result.html')
self.response.out.write(template.render(test_result_values))
app = webapp2.WSGIApplication([('/', MainHandler), ('/test_result', test_resultHandler)],
debug=True)
“Properties” are typically meant to be accessed on object instances, not on classes. Try changing:
To something like:
(Not that this is a great design, but I think it will fix the problem you’re having — namely that property objects only get special behavior when they’re accessed via a class isntance, and not via the class itself.)