{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "from xml.etree import ElementTree\n", "import pandas as pd\n", "import time\n", "import numpy as np\n", "import json\n", "import geocoder" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### get data from the Zillow" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "def GetDataFromWeb(num):\n", " count = 0\n", " listingsFromWeb = []\n", " for i in range(1,num): # 21\n", " page_url = \"http://www.zillow.com/homes/for_sale/Boston-MA/44269_rid/any_days/42.4379,-70.629044,42.191135,-71.310883_rect/10_zm/%d_p/\" %i\n", " page = requests.get(page_url).text\n", " page_soup = BeautifulSoup(page, 'html.parser')\n", " item = page_soup.find(\"div\", {\"id\": 'search-results' }).find_all(\"div\", {\"class\": \"zsg-photo-card-caption\"})\n", " if not len(item) == 26:\n", " print i, len(item)\n", " else:\n", " listingsFromWeb += (item)\n", " time.sleep(2)\n", " count += 1\n", " print i\n", " return listingsFromWeb\n", "\n", "listingsFromWeb = GetDataFromWeb(28)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "26\n", "4\n", "
$899,0005 bds · 2 ba · 2,423 sqft
8 days on Zillow·54 Grampian Way, Dorchester, MA
Gibson Sotheby's International Realty
$529,0003 bds · 2 ba · 1,260 sqft
$20,000 (Nov 1)·7-9 Herbertson # 2, Jamaica Plain, MA
$849,0004 bds · 3 ba · 2,400 sqft
Open: Fri. 6-7pm·321 Dorchester St # 1, Boston, MA