{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "### Get rent data from trulia.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import os\n", "import requests\n", "from bs4 import BeautifulSoup as bs\n", "import csv\n", "import time" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class Property():\n", "\tdef __init__(self,xml):\n", "\t\tself.latitude = str(find_by_itemprop(xml,'latitude'))\n", "\t\tself.longitude = str(find_by_itemprop(xml,'longitude'))\n", "\t\tself.address = str(find_by_itemprop_text(xml,'streetAddress'))\n", "\t\tself.zip = str(find_by_itemprop_text(xml,'postalCode'))\n", "\t\tself.city = str(find_by_itemprop_text(xml,'addressLocality'))\n", "\t\tself.fullAddress = str(self.address + ' '+ self.city +' '+ self.zip).replace(',','')\n", "\t\tif xml.find(class_ = 'listBordered mbn') is not None:\n", "\t\t\tself.construct_big(xml)\n", "\t\telse:\n", "\t\t\tself.construct_simple(xml)\n", "\n", "\tdef construct_simple(self,xml):\n", "\t\tself.type = 'single'\n", "\t\ttry:\n", "\t\t self.price = str((xml.find(class_ = 'lastCol').find_all('span')[1].text.replace('$','').replace(',','')))\n", "\t\texcept UnicodeEncodeError:\n", "\t\t self.price = None\n", "\t\ttry:\n", "\t\t self.bedrooms = str(((xml.find(class_ = 'cols3').find('small').text.split(' ')[0])))\n", "\t\texcept ValueError:\n", "\t\t self.bedrooms = None\n", "\t\texcept AttributeError:\n", "\t\t self.bedrooms = None\n", "\t\ttry:\n", " \t\t self.bathrooms = str((xml.find(class_ = 'cols3').find_all('small')[1].text.split(' ')[0]))\n", " except AttributeError:\n", " self.bathrooms = None\t\t\n", "\t\ttry:\n", "\t\t\tself.sqft = str((xml.find(class_ = 'cols4').find_all('small')[1].text.split(' ')[0]))\n", "\t\texcept IndexError:\n", "\t\t\tself.sqft = None\n", "\t\t\n", "\tdef construct_big(self,xml):\n", "\t\tself.type = 'apartment'\n", "\t\tself.units = []\n", "\t\tfor apt in xml.find_all('li',class_ = 'pvs mvn pll')[0:-1]:\n", "\t\t\tself.units.append(Apartment(apt))\n", "\t\t \n", "\tdef output(self):\n", "\t\tif self.type == 'single':\n", "\t\t\treturn [[self.latitude,self.longitude,writeNone(self.fullAddress),writeNone(self.zip), writeNone(self.price),writeNone(self.bedrooms),writeNone(self.bathrooms),writeNone(self.sqft), (time.strftime(\"%d/%m/%Y\"))]]\n", "\t\telse:\n", "\t\t\treturn [([self.latitude,self.longitude,writeNone(self.fullAddress),writeNone(self.zip), writeNone(apt.price), writeNone(apt.roomType), writeNone(apt.bathrooms),writeNone(apt.sqft), (time.strftime(\"%d/%m/%Y\"))]) for apt in self.units]\n", " \n", "class Apartment():\n", "\tdef __init__(self,xml):\n", "\t\t self.roomType = str(xml.find(class_ = 'txtL col cols7').text.replace(' ','')).splitlines()[1]\n", "\t\t self.bathrooms = str(xml.find(class_= 'txtC col cols4').text.split(' ')[0].rstrip())\n", "\t\t try:\n", "\t\t self.sqft= str(xml.find_all(class_= 'txtC col cols6')[0].text.replace(' ','').replace('+','').replace('sqft','')).splitlines()[1]\n", "\t\t except TypeError:\n", "\t\t self.sqft = None\n", "\t\t try:\n", "\t\t self.price = str(xml.find_all(class_= 'txtC col cols6')[1].text.replace('$','').replace('+/mo','').replace('/mo','').replace('+','').rstrip().replace(' ','')).splitlines()[1]\n", "\t\t except TypeError:\n", "\t\t self.price = None\n", "\t\t except IndexError: \n", "\t\t self.price = None\n", "\t\t \n", "\n", "def writeNone(val):\n", "\tif val is None:\n", "\t\treturn ''\n", "\telse:\n", "\t\treturn val\n", "\t\n", "def find_by_itemprop(xml,prop):\n", "\tmytag = xml.find(attrs={'itemprop' : prop})\n", "\ttry:\n", "\t return mytag.get('content')\n", "\texcept AttributeError:\n", "\t return ''\n", "\t \n", "def find_by_itemprop_text(xml,prop):\n", " mytag = xml.find(attrs={'itemprop' : prop})\n", " try:\n", " return mytag.text\n", " except AttributeError:\n", " return ''\n", "def scrape_zips(zips):\n", " #create a blank list with all of the Property Objects for parsed homes\n", " #create a list for the CSV outputs\n", " start_time = time.time()\n", " #main()\n", " csv_output =[]\n", " parsed_homes = []\n", " file_name = zips[-1]\n", " for z in zips:\n", " base_page = 'http://www.trulia.com/for_rent/'+str(z)+'_zip/'\n", " soup = bs(requests.get(base_page).text,'lxml')\n", "\n", " #create list of pages to scrape\n", " pages = [base_page]\n", " #create soup of area to look for number of pages\n", " pages_area = soup.find_all(class_='srpPagination_list')\n", " #create variable for number of pages\n", " try:\n", " number_of_pages= int(bs(str(pages_area)).find_all('a')[-1].text)\n", " #loop over the number of pages to create a list with all of the urls\n", " for i in range(2,number_of_pages+1):\n", " pages.append(base_page + str(i)+'_p')\n", " except IndexError:\n", " number_of_pages = 1\n", " print('you are scraping ' + str(number_of_pages)+ ' pages and approximately ' + str(number_of_pages*30) + ' listings for zip: '+ str(z))\n", " #tracker to see which house we're on.\n", " listing_comp= 0\n", " #pageinate through all of the pages and append each listing to the CSV file\n", " print(\"--- %s seconds ---\" % (time.time() - start_time))\n", " for page in pages:\n", " soup = bs(requests.get(page).text,'lxml')\n", " mylist = soup.find_all(class_='property-data-elem')\n", " #if my list is zero, break!!!!!!!!!!!!!!\n", " listing_comp +=30\n", " print('listings completed :' + str(listing_comp) + '/' + str(number_of_pages*30))\n", " for home in mylist:\n", " GlobalHome.append(home)\n", " parsed_homes.append(Property(home))\n", " csv_output += Property(home).output()\n", "\n", " #set the working directory to the desktop\n", " try:\n", " os.chdir('data')\n", " except OSError or WindowsError:\n", " pass\n", " \n", " #save a copy of this scrape\n", " with open('data'+z+'_'+((time.strftime(\"%d/%m/%Y\").replace('/',' ')))+'.csv','wb') as stored_file:\n", "\twr = csv.writer(stored_file)\n", "\twr.writerows(csv_output)\n", " #append this scrape to the master database\n", " with open('core.csv','a') as core_file:\n", "\twrC = csv.writer(core_file)\n", "\twrC.writerows(csv_output)\n", " #duration_time = (time.time() - start_time())\n", " #number_of_listings = number_of_pages*30\n", " #listings_per_second = number_of_listings%duration_time\n", " print(\"--- %s seconds ---\" % (time.time() - start_time))\n", " return (\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "using the zip of Boston, it parsees the such data as zip, address, number of bed room and bath room, and price, and save them as a csv file in the data folder" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "69\n", "you are scraping 1 pages and approximately 30 listings for zip: 02101\n", "--- 0.990999937057 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02102\n", "--- 2.68799996376 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02103\n", "--- 4.54999995232 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02104\n", "--- 6.507999897 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02105\n", "--- 9.02600002289 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02106\n", "--- 11.0230000019 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02107\n", "--- 13.0369999409 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02108\n", "--- 15.0639998913 seconds ---\n", "listings completed :30/30\n", "you are scraping 5 pages and approximately 150 listings for zip: 02109\n", "--- 18.0729999542 seconds ---\n", "listings completed :30/150\n", "listings completed :60/150\n", "listings completed :90/150\n", "listings completed :120/150\n", "listings completed :150/150\n", "you are scraping 4 pages and approximately 120 listings for zip: 02110\n", "--- 28.6610000134 seconds ---\n", "listings completed :30/120\n", "listings completed :60/120\n", "listings completed :90/120\n", "listings completed :120/120\n", "you are scraping 1 pages and approximately 30 listings for zip: 02111\n", "--- 36.8899998665 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02112\n", "--- 39.6050000191 seconds ---\n", "listings completed :30/30\n", "you are scraping 17 pages and approximately 510 listings for zip: 02113\n", "--- 42.0709998608 seconds ---\n", "listings completed :30/510\n", "listings completed :60/510\n", "listings completed :90/510\n", "listings completed :120/510\n", "listings completed :150/510\n", "listings completed :180/510\n", "listings completed :210/510\n", "listings completed :240/510\n", "listings completed :270/510\n", "listings completed :300/510\n", "listings completed :330/510\n", "listings completed :360/510\n", "listings completed :390/510\n", "listings completed :420/510\n", "listings completed :450/510\n", "listings completed :480/510\n", "listings completed :510/510\n", "you are scraping 16 pages and approximately 480 listings for zip: 02114\n", "--- 73.3629999161 seconds ---\n", "listings completed :30/480\n", "listings completed :60/480\n", "listings completed :90/480\n", "listings completed :120/480\n", "listings completed :150/480\n", "listings completed :180/480\n", "listings completed :210/480\n", "listings completed :240/480\n", "listings completed :270/480\n", "listings completed :300/480\n", "listings completed :330/480\n", "listings completed :360/480\n", "listings completed :390/480\n", "listings completed :420/480\n", "listings completed :450/480\n", "listings completed :480/480\n", "you are scraping 37 pages and approximately 1110 listings for zip: 02115\n", "--- 104.29399991 seconds ---\n", "listings completed :30/1110\n", "listings completed :60/1110\n", "listings completed :90/1110\n", "listings completed :120/1110\n", "listings completed :150/1110\n", "listings completed :180/1110\n", "listings completed :210/1110\n", "listings completed :240/1110\n", "listings completed :270/1110\n", "listings completed :300/1110\n", "listings completed :330/1110\n", "listings completed :360/1110\n", "listings completed :390/1110\n", "listings completed :420/1110\n", "listings completed :450/1110\n", "listings completed :480/1110\n", "listings completed :510/1110\n", "listings completed :540/1110\n", "listings completed :570/1110\n", "listings completed :600/1110\n", "listings completed :630/1110\n", "listings completed :660/1110\n", "listings completed :690/1110\n", "listings completed :720/1110\n", "listings completed :750/1110\n", "listings completed :780/1110\n", "listings completed :810/1110\n", "listings completed :840/1110\n", "listings completed :870/1110\n", "listings completed :900/1110\n", "listings completed :930/1110\n", "listings completed :960/1110\n", "listings completed :990/1110\n", "listings completed :1020/1110\n", "listings completed :1050/1110\n", "listings completed :1080/1110\n", "listings completed :1110/1110\n", "you are scraping 32 pages and approximately 960 listings for zip: 02116\n", "--- 172.093999863 seconds ---\n", "listings completed :30/960\n", "listings completed :60/960\n", "listings completed :90/960\n", "listings completed :120/960\n", "listings completed :150/960\n", "listings completed :180/960\n", "listings completed :210/960\n", "listings completed :240/960\n", "listings completed :270/960\n", "listings completed :300/960\n", "listings completed :330/960\n", "listings completed :360/960\n", "listings completed :390/960\n", "listings completed :420/960\n", "listings completed :450/960\n", "listings completed :480/960\n", "listings completed :510/960\n", "listings completed :540/960\n", "listings completed :570/960\n", "listings completed :600/960\n", "listings completed :630/960\n", "listings completed :660/960\n", "listings completed :690/960\n", "listings completed :720/960\n", "listings completed :750/960\n", "listings completed :780/960\n", "listings completed :810/960\n", "listings completed :840/960\n", "listings completed :870/960\n", "listings completed :900/960\n", "listings completed :930/960\n", "listings completed :960/960\n", "you are scraping 1 pages and approximately 30 listings for zip: 02117\n", "--- 238.79399991 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02118\n", "--- 240.947000027 seconds ---\n", "listings completed :30/30\n", "you are scraping 8 pages and approximately 240 listings for zip: 02119\n", "--- 244.791999817 seconds ---\n", "listings completed :30/240\n", "listings completed :60/240\n", "listings completed :90/240\n", "listings completed :120/240\n", "listings completed :150/240\n", "listings completed :180/240\n", "listings completed :210/240\n", "listings completed :240/240\n", "you are scraping 36 pages and approximately 1080 listings for zip: 02120\n", "--- 259.669999838 seconds ---\n", "listings completed :30/1080\n", "listings completed :60/1080\n", "listings completed :90/1080\n", "listings completed :120/1080\n", "listings completed :150/1080\n", "listings completed :180/1080\n", "listings completed :210/1080\n", "listings completed :240/1080\n", "listings completed :270/1080\n", "listings completed :300/1080\n", "listings completed :330/1080\n", "listings completed :360/1080\n", "listings completed :390/1080\n", "listings completed :420/1080\n", "listings completed :450/1080\n", "listings completed :480/1080\n", "listings completed :510/1080\n", "listings completed :540/1080\n", "listings completed :570/1080\n", "listings completed :600/1080\n", "listings completed :630/1080\n", "listings completed :660/1080\n", "listings completed :690/1080\n", "listings completed :720/1080\n", "listings completed :750/1080\n", "listings completed :780/1080\n", "listings completed :810/1080\n", "listings completed :840/1080\n", "listings completed :870/1080\n", "listings completed :900/1080\n", "listings completed :930/1080\n", "listings completed :960/1080\n", "listings completed :990/1080\n", "listings completed :1020/1080\n", "listings completed :1050/1080\n", "listings completed :1080/1080\n", "you are scraping 2 pages and approximately 60 listings for zip: 02121\n", "--- 329.139999866 seconds ---\n", "listings completed :30/60\n", "listings completed :60/60\n", "you are scraping 3 pages and approximately 90 listings for zip: 02122\n", "--- 333.612999916 seconds ---\n", "listings completed :30/90\n", "listings completed :60/90\n", "listings completed :90/90\n", "you are scraping 1 pages and approximately 30 listings for zip: 02123\n", "--- 341.090999842 seconds ---\n", "listings completed :30/30\n", "you are scraping 4 pages and approximately 120 listings for zip: 02124\n", "--- 343.842999935 seconds ---\n", "listings completed :30/120\n", "listings completed :60/120\n", "listings completed :90/120\n", "listings completed :120/120\n", "you are scraping 5 pages and approximately 150 listings for zip: 02125\n", "--- 351.847999811 seconds ---\n", "listings completed :30/150\n", "listings completed :60/150\n", "listings completed :90/150\n", "listings completed :120/150\n", "listings completed :150/150\n", "you are scraping 1 pages and approximately 30 listings for zip: 02126\n", "--- 363.184000015 seconds ---\n", "listings completed :30/30\n", "you are scraping 22 pages and approximately 660 listings for zip: 02127\n", "--- 366.396999836 seconds ---\n", "listings completed :30/660\n", "listings completed :60/660\n", "listings completed :90/660\n", "listings completed :120/660\n", "listings completed :150/660\n", "listings completed :180/660\n", "listings completed :210/660\n", "listings completed :240/660\n", "listings completed :270/660\n", "listings completed :300/660\n", "listings completed :330/660\n", "listings completed :360/660\n", "listings completed :390/660\n", "listings completed :420/660\n", "listings completed :450/660\n", "listings completed :480/660\n", "listings completed :510/660\n", "listings completed :540/660\n", "listings completed :570/660\n", "listings completed :600/660\n", "listings completed :630/660\n", "listings completed :660/660\n", "you are scraping 1 pages and approximately 30 listings for zip: 02128\n", "--- 405.738999844 seconds ---\n", "listings completed :30/30\n", "you are scraping 8 pages and approximately 240 listings for zip: 02129\n", "--- 409.197000027 seconds ---\n", "listings completed :30/240\n", "listings completed :60/240\n", "listings completed :90/240\n", "listings completed :120/240\n", "listings completed :150/240\n", "listings completed :180/240\n", "listings completed :210/240\n", "listings completed :240/240\n", "you are scraping 21 pages and approximately 630 listings for zip: 02130\n", "--- 425.365999937 seconds ---\n", "listings completed :30/630\n", "listings completed :60/630\n", "listings completed :90/630\n", "listings completed :120/630\n", "listings completed :150/630\n", "listings completed :180/630\n", "listings completed :210/630\n", "listings completed :240/630\n", "listings completed :270/630\n", "listings completed :300/630\n", "listings completed :330/630\n", "listings completed :360/630\n", "listings completed :390/630\n", "listings completed :420/630\n", "listings completed :450/630\n", "listings completed :480/630\n", "listings completed :510/630\n", "listings completed :540/630\n", "listings completed :570/630\n", "listings completed :600/630\n", "listings completed :630/630\n", "you are scraping 1 pages and approximately 30 listings for zip: 02131\n", "--- 466.40899992 seconds ---\n", "listings completed :30/30\n", "you are scraping 4 pages and approximately 120 listings for zip: 02132\n", "--- 469.606999874 seconds ---\n", "listings completed :30/120\n", "listings completed :60/120\n", "listings completed :90/120\n", "listings completed :120/120\n", "you are scraping 1 pages and approximately 30 listings for zip: 02133\n", "--- 477.25999999 seconds ---\n", "listings completed :30/30\n", "you are scraping 71 pages and approximately 2130 listings for zip: 02134\n", "--- 479.738999844 seconds ---\n", "listings completed :30/2130\n", "listings completed :60/2130\n", "listings completed :90/2130\n", "listings completed :120/2130\n", "listings completed :150/2130\n", "listings completed :180/2130\n", "listings completed :210/2130\n", "listings completed :240/2130\n", "listings completed :270/2130\n", "listings completed :300/2130\n", "listings completed :330/2130\n", "listings completed :360/2130\n", "listings completed :390/2130\n", "listings completed :420/2130\n", "listings completed :450/2130\n", "listings completed :480/2130\n", "listings completed :510/2130\n", "listings completed :540/2130\n", "listings completed :570/2130\n", "listings completed :600/2130\n", "listings completed :630/2130\n", "listings completed :660/2130\n", "listings completed :690/2130\n", "listings completed :720/2130\n", "listings completed :750/2130\n", "listings completed :780/2130\n", "listings completed :810/2130\n", "listings completed :840/2130\n", "listings completed :870/2130\n", "listings completed :900/2130\n", "listings completed :930/2130\n", "listings completed :960/2130\n", "listings completed :990/2130\n", "listings completed :1020/2130\n", "listings completed :1050/2130\n", "listings completed :1080/2130\n", "listings completed :1110/2130\n", "listings completed :1140/2130\n", "listings completed :1170/2130\n", "listings completed :1200/2130\n", "listings completed :1230/2130\n", "listings completed :1260/2130\n", "listings completed :1290/2130\n", "listings completed :1320/2130\n", "listings completed :1350/2130\n", "listings completed :1380/2130\n", "listings completed :1410/2130\n", "listings completed :1440/2130\n", "listings completed :1470/2130\n", "listings completed :1500/2130\n", "listings completed :1530/2130\n", "listings completed :1560/2130\n", "listings completed :1590/2130\n", "listings completed :1620/2130\n", "listings completed :1650/2130\n", "listings completed :1680/2130\n", "listings completed :1710/2130\n", "listings completed :1740/2130\n", "listings completed :1770/2130\n", "listings completed :1800/2130\n", "listings completed :1830/2130\n", "listings completed :1860/2130\n", "listings completed :1890/2130\n", "listings completed :1920/2130\n", "listings completed :1950/2130\n", "listings completed :1980/2130\n", "listings completed :2010/2130\n", "listings completed :2040/2130\n", "listings completed :2070/2130\n", "listings completed :2100/2130\n", "listings completed :2130/2130\n", "you are scraping 105 pages and approximately 3150 listings for zip: 02135\n", "--- 620.020999908 seconds ---\n", "listings completed :30/3150\n", "listings completed :60/3150\n", "listings completed :90/3150\n", "listings completed :120/3150\n", "listings completed :150/3150\n", "listings completed :180/3150\n", "listings completed :210/3150\n", "listings completed :240/3150\n", "listings completed :270/3150\n", "listings completed :300/3150\n", "listings completed :330/3150\n", "listings completed :360/3150\n", "listings completed :390/3150\n", "listings completed :420/3150\n", "listings completed :450/3150\n", "listings completed :480/3150\n", "listings completed :510/3150\n", "listings completed :540/3150\n", "listings completed :570/3150\n", "listings completed :600/3150\n", "listings completed :630/3150\n", "listings completed :660/3150\n", "listings completed :690/3150\n", "listings completed :720/3150\n", "listings completed :750/3150\n", "listings completed :780/3150\n", "listings completed :810/3150\n", "listings completed :840/3150\n", "listings completed :870/3150\n", "listings completed :900/3150\n", "listings completed :930/3150\n", "listings completed :960/3150\n", "listings completed :990/3150\n", "listings completed :1020/3150\n", "listings completed :1050/3150\n", "listings completed :1080/3150\n", "listings completed :1110/3150\n", "listings completed :1140/3150\n", "listings completed :1170/3150\n", "listings completed :1200/3150\n", "listings completed :1230/3150\n", "listings completed :1260/3150\n", "listings completed :1290/3150\n", "listings completed :1320/3150\n", "listings completed :1350/3150\n", "listings completed :1380/3150\n", "listings completed :1410/3150\n", "listings completed :1440/3150\n", "listings completed :1470/3150\n", "listings completed :1500/3150\n", "listings completed :1530/3150\n", "listings completed :1560/3150\n", "listings completed :1590/3150\n", "listings completed :1620/3150\n", "listings completed :1650/3150\n", "listings completed :1680/3150\n", "listings completed :1710/3150\n", "listings completed :1740/3150\n", "listings completed :1770/3150\n", "listings completed :1800/3150\n", "listings completed :1830/3150\n", "listings completed :1860/3150\n", "listings completed :1890/3150\n", "listings completed :1920/3150\n", "listings completed :1950/3150\n", "listings completed :1980/3150\n", "listings completed :2010/3150\n", "listings completed :2040/3150\n", "listings completed :2070/3150\n", "listings completed :2100/3150\n", "listings completed :2130/3150\n", "listings completed :2160/3150\n", "listings completed :2190/3150\n", "listings completed :2220/3150\n", "listings completed :2250/3150\n", "listings completed :2280/3150\n", "listings completed :2310/3150\n", "listings completed :2340/3150\n", "listings completed :2370/3150\n", "listings completed :2400/3150\n", "listings completed :2430/3150\n", "listings completed :2460/3150\n", "listings completed :2490/3150\n", "listings completed :2520/3150\n", "listings completed :2550/3150\n", "listings completed :2580/3150\n", "listings completed :2610/3150\n", "listings completed :2640/3150\n", "listings completed :2670/3150\n", "listings completed :2700/3150\n", "listings completed :2730/3150\n", "listings completed :2760/3150\n", "listings completed :2790/3150\n", "listings completed :2820/3150\n", "listings completed :2850/3150\n", "listings completed :2880/3150\n", "listings completed :2910/3150\n", "listings completed :2940/3150\n", "listings completed :2970/3150\n", "listings completed :3000/3150\n", "listings completed :3030/3150\n", "listings completed :3060/3150\n", "listings completed :3090/3150\n", "listings completed :3120/3150\n", "listings completed :3150/3150\n", "you are scraping 3 pages and approximately 90 listings for zip: 02136\n", "--- 820.770999908 seconds ---\n", "listings completed :30/90\n", "listings completed :60/90\n", "listings completed :90/90\n", "you are scraping 1 pages and approximately 30 listings for zip: 02137\n", "--- 826.176999807 seconds ---\n", "listings completed :30/30\n", "you are scraping 1 pages and approximately 30 listings for zip: 02138\n", "--- 828.315999985 seconds ---\n", "listings completed :30/30\n", "you are scraping 24 pages and approximately 720 listings for zip: 02139\n", "--- 831.554999828 seconds ---\n", "listings completed :30/720\n", "listings completed :60/720\n", "listings completed :90/720\n", "listings completed :120/720\n", "listings completed :150/720\n", "listings completed :180/720\n", "listings completed :210/720\n", "listings completed :240/720\n", "listings completed :270/720\n", "listings completed :300/720\n", "listings completed :330/720\n", "listings completed :360/720\n", "listings completed :390/720\n", "listings completed :420/720\n", "listings completed :450/720\n", "listings completed :480/720\n", "listings completed :510/720\n", "listings completed :540/720\n", "listings completed :570/720\n", "listings completed :600/720\n", "listings completed :630/720\n", "listings completed :660/720\n", "listings completed :690/720\n", "listings completed :720/720\n", "you are scraping 13 pages and approximately 390 listings for zip: 02140\n", "--- 875.830999851 seconds ---\n", "listings completed :30/390\n", "listings completed :60/390\n", "listings completed :90/390\n", "listings completed :120/390\n", "listings completed :150/390\n", "listings completed :180/390\n", "listings completed :210/390\n", "listings completed :240/390\n", "listings completed :270/390\n", "listings completed :300/390\n", "listings completed :330/390\n", "listings completed :360/390\n", "listings completed :390/390\n", "you are scraping 12 pages and approximately 360 listings for zip: 02141\n", "--- 906.566999912 seconds ---\n", "listings completed :30/360\n", "listings completed :60/360\n", "listings completed :90/360\n", "listings completed :120/360\n", "listings completed :150/360\n", "listings completed :180/360\n", "listings completed :210/360\n", "listings completed :240/360\n", "listings completed :270/360\n", "listings completed :300/360\n", "listings completed :330/360\n", "listings completed :360/360\n", "you are scraping 3 pages and approximately 90 listings for zip: 02142\n", "--- 948.003999949 seconds ---\n", "listings completed :30/90\n", "listings completed :60/90\n", "listings completed :90/90\n" ] } ], "source": [ "BostonZip = ['02101', '02102', '02103', '02104', '02105', '02106', '02107', '02108', '02109', '02110', '02111', '02112', '02113', '02114', '02115', '02116', '02117', '02118', '02119', '02120', '02121', '02122', '02123', '02124', '02125', '02126', '02127', '02128', '02129', '02130', '02131', '02132', '02133', '02134', '02135', '02136', '02137', '02138', '02139', '02140', '02141', '02142', '02143', '02144', '02145', '02148', '02149', '02150', '02151', '02152', '02153', '02154', '02155', '02156', '02163', '02169', '02170', '02171', '02176', '02177', '02180', '02184', '02185', '02186', '02187', '02188', '02189', '02190', '02191']\n", "print len(BostonZip)\n", "scrape_zips(BostonZip)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 1 }