When I run the following code, I keep getting this error: Traceback (most recent

Question

0

Asked: May 27, 20262026-05-27T17:31:34+00:00 2026-05-27T17:31:34+00:00

When I run the following code, I keep getting this error: Traceback (most recent

0

When I run the following code, I keep getting this error:

Traceback (most recent call last):
  File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 94, in <module>
    main()
  File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 71, in main
    for final_url in pool.imap(handle_listing, listings):
  File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenpool.py", line 232, in next
    val = self.waiters.get().wait()
  File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 166, in wait
    return self._exit_event.wait()
  File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\event.py", line 120, in wait
    current.throw(*self._exc)
  File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 192, in main
    result = function(*args, **kwargs)
  File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 35, in handle_listing
    title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'

What is wrong?

It has something to do with the Title match but I don’t know how to fix it!

If you could help me I would really appreciate it!

Thanks!

from gzip import GzipFile
from cStringIO import StringIO
import re
import webbrowser
import time
from difflib import SequenceMatcher
import os
import sys
from BeautifulSoup import BeautifulSoup
import eventlet
from eventlet.green import urllib2
import urllib2
import urllib

def download(url):
    print "Downloading:", url
    s = urllib2.urlopen(url).read()
    if s[:2] == '\x1f\x8b':
        ifh = GzipFile(mode='rb', fileobj=StringIO(s))
        s = ifh.read()
    print "Downloaded: ", url
    return s

def replace_chars(text, replacements):
    return ''.join(replacements.get(x,x) for x in text)

def handle_listing(listing_url):
    listing_document = BeautifulSoup(download(listing_url))

    # ignore pages that link to yellowpages
    if not listing_document.find("a", href=re.compile(re.escape("http://www.yellowpages.com/") + ".*")):
        listing_title = listing_document.title.text
        reps = {' ':'-', ',':'', '\'':'', '[':'', ']':''}
        title, = TITLE_MATCH.match(listing_title).groups()
        address, = ADDRESS_MATCH.match(listing_title).groups()

        yellow_page_url = "http://www.yellowpages.com/%s/%s?order=distance" % (
            replace_chars(address, reps),
            replace_chars(title, reps),
        )

        yellow_page = BeautifulSoup(download(yellow_page_url))

        page_url = yellow_page.find("h3", {"class" : "business-name fn org"})
        if page_url:
            page_url = page_url.a["href"]

            business_name = title[:title.index(",")]

            page = BeautifulSoup(download(page_url))
            yellow_page_address =  page.find("span", {"class" : "street-address"})
            if yellow_page_address:

                if SequenceMatcher(None, address, yellow_page_address.text).ratio() >= 0.5:
                    pid, = re.search(r'p(\d{5,20})\.jsp', listing_url).groups(0)
                    page_escaped = replace_chars(page_url, {':':'%3A', '/':'%2F', '?':'%3F', '=':'%3D'})

                    final_url = "http://www.locationary.com/access/proxy.jsp?ACTION_TOKEN=proxy_jsp$JspView$SaveAction&inPlaceID=%s&xxx_c_1_f_987=%s" % (
                            pid, page_escaped)
                    return final_url


def main():

    pool = eventlet.GreenPool()
    listings_document = BeautifulSoup(download(START_URL))
    listings = listings_document.findAll("a", href = LOCATION_LISTING)
    listings = [listing['href'] for listing in listings]

    for final_url in pool.imap(handle_listing, listings):
        print final_url


        if str(final_url) is not None:

            url = str(final_url)

            req = urllib2.Request(url)
            response = urllib2.urlopen(req)
            page = response.read()
            time.sleep(2)



for a in range(2,3):

    START_URL = 'http://www.locationary.com/place/en/US/New_Jersey/Randolph-page' + str(a) + '/?ACTION_TOKEN=NumericAction'
    TITLE_MATCH = re.compile(r'(.*) \(\d{1,10}.{1,100}\)$')
    ADDRESS_MATCH = re.compile(r'.{1,100}\((.*), .{4,14}, United States\)$')
    LOCATION_LISTING = re.compile(r'http://www\.locationary\.com/place/en/US/.{1,50}/.{1,50}/.{1,100}\.jsp')

    if __name__ == '__main__':
        main()

Report

Leave an answer
Cancel reply

You must login to add an answer.

Need An Account,

1 Answer

Editorial Team · Answer 1 · 2026-05-27T17:31:34+00:00

Editorial Team

2026-05-27T17:31:34+00:00Added an answer on May 27, 2026 at 5:31 pm

Quoting from your error:

title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: ‘NoneType’ object has no attribute ‘groups’

TITLE_MATCH.match(listing_title) returns None, so you can’t call .groups().

0

Reply
Share
Share

- Report

Sign Up

Sign In

Forgot Password

The Archive Base Latest Questions

When I run the following code, I keep getting this error: Traceback (most recent

Leave an answerCancel reply

1 Answer

Leave an answer
Cancel reply