switched jsdom for cheerio. Epic speed improvements!

This commit is contained in:
Victor Nguyen 2013-03-18 22:37:29 +11:00
parent ce9317034d
commit c1b9977bf8
3 changed files with 111 additions and 128 deletions

View file

@ -9,12 +9,12 @@
"dependencies": {
"express": "3.1.0",
"jade": "*",
"jsdom": "0.3.4",
"request": "2.12.0",
"underscore": "*"
"underscore": "*",
"cheerio": "~0.10.8"
},
"engines": {
"node": "0.8.x",
"npm": "1.1.x"
}
}
}

View file

@ -6,7 +6,7 @@
exports.index = function(req, res){
var _ = require('underscore'),
jsdom = require('jsdom'),
cheerio = require('cheerio'),
request = require('request');
@ -28,18 +28,11 @@ exports.index = function(req, res){
// TODO: render error page here...
}
jsdom.env(
{
html: body,
scripts: ['http://code.jquery.com/jquery.min.js']
},
parsePage
);
parsePage(body);
}
function parsePage(error, window) {
var $ = window.jQuery,
$body = $(window.document.body),
function parsePage(body) {
var $ = cheerio.load(body),
$vehicleLinks = $('#vehicles').children('a');
$vehicleLinks.each(function(i,link) {

View file

@ -6,7 +6,7 @@
exports.fetch = function(req, res){
var _ = require('underscore'),
jsdom = require('jsdom'),
cheerio = require('cheerio'),
request = require('request');
var model = req.params.model,
@ -33,122 +33,112 @@ exports.fetch = function(req, res){
var tickUrl = 'http://www.mazda.com.au/brochures/base-framework/img/specs/specs_tick.gif',
falseString = '-';
jsdom.env(
var $ = cheerio.load(body),
$styles = $('.spec-body');
// For each Body Style (Sedan, Wagon, etc.)
$styles.each(_parseBodyStyleHtml);
// console.log(specs);
// console.log('FIRST BODY STYLE CATEGORIES YO: ', _.pluck(specs[0].categories, 'name'));
// res.send('There are ' + $styles.length + ' body style(s) for the ' + model);
var carName = $('h1').text().replace(' Specifications','');
res.render(
'specs',
{
html: body,
scripts: ['http://code.jquery.com/jquery.min.js']
},
function (error, window) {
var $ = window.jQuery,
$body = $(window.document.body),
$styles = $('.spec-body');
// For each Body Style (Sedan, Wagon, etc.)
$styles.each(_parseBodyStyleHtml);
// console.log(specs);
// console.log('FIRST BODY STYLE CATEGORIES YO: ', _.pluck(specs[0].categories, 'name'));
// res.send('There are ' + $styles.length + ' body style(s) for the ' + model);
var carName = $body.find('h1').text().replace(' Specifications','');
res.render(
'specs',
{
title: carName,
styles: specs,
modifier: modifier,
pretty: !isMinified
}
);
function _parseBodyStyleHtml(i,bodyStyle) {
var style = {
name: '',
image: '',
slug: '',
grades: [],
categories: []
};
var $bodyStyle = $(bodyStyle),
$cats = $bodyStyle.find('.spec-cat');
// Create array of grade names (e.g. 'Sport', 'Touring', etc.)
$cats.first().find('table thead th').each(function (i,grade) {
// the first <th> is not a grade header
if (i === 0) {
return;
}
style.grades.push( $.trim($(grade).text()) );
});
// Assign Body Style name (e.g. 'Sedan')
style.name = $.trim( $bodyStyle.find('h3').text() );
style.image = 'http://www.mazda.com.au' + $('#specs-body').find('li').eq(i).find('img').attr('src');
style.slug = slugify(style.name);
// Collect specs for each category (e.g. 'Powertrain', 'Chassis', etc.)
$cats.each(function (i,cat) {
var $cat = $(cat),
$rows = $cat.find('tbody tr');
var category = {};
// category.name = $.trim( $cat.children('h4').text() );
category.name = $.trim( $('.specs-tabs').first().find('li').eq(i).text() );
category.specs = [];
category.slug = slugify(category.name);
// collect each row's cell data
$rows.each(function (i,rowHtml) {
var $row = $(rowHtml),
row = [];
$row.find('td').each(function (i,cell) {
var $cell = $(cell);
// strip out any <sup> elems
$cell.find('sup').remove();
var text = $.trim( $(cell).text() ),
hasTick = $cell.html().search('/images/specs/tick.gif') !== -1,
hasDash = text === '-';
if (hasTick) {
// assign true boolean as cell value if cell has a tick image
row.push(tickUrl);
}
else if (hasDash) {
row.push(falseString);
}
else {
row.push(text);
}
});
category.specs.push(row);
});
style.categories.push(category);
});
// Append Body Style object to main specs array
specs.push( style );
}
/**
* Transform text into a URL slug: spaces turned into dashes, remove non alnum
* @param string text
*/
function slugify(text) {
text = text.replace(/[^-a-zA-Z0-9,&\s]+/ig, '');
text = text.replace(/-/gi, "_");
text = text.replace(/\s/gi, "-");
text = text.toLowerCase();
return text;
}
title: carName,
styles: specs,
modifier: modifier,
pretty: !isMinified
}
);
function _parseBodyStyleHtml(i,bodyStyle) {
var style = {
name: '',
image: '',
slug: '',
grades: [],
categories: []
};
var $bodyStyle = $(bodyStyle),
$cats = $bodyStyle.find('.spec-cat');
// Create array of grade names (e.g. 'Sport', 'Touring', etc.)
$cats.first().find('table thead th').each(function (i,grade) {
// the first <th> is not a grade header
if (i === 0) {
return;
}
style.grades.push( $(grade).text() );
});
// Assign Body Style name (e.g. 'Sedan')
style.name = $bodyStyle.find('h3').text();
style.image = 'http://www.mazda.com.au' + $('#specs-body').find('li').eq(i).find('img').attr('src');
style.slug = slugify(style.name);
// Collect specs for each category (e.g. 'Powertrain', 'Chassis', etc.)
$cats.each(function (i,cat) {
var $cat = $(cat),
$rows = $cat.find('tbody tr');
var category = {};
category.name = $('.specs-tabs').first().find('li').eq(i).text();
category.specs = [];
category.slug = slugify(category.name);
// collect each row's cell data
$rows.each(function (i,rowHtml) {
var $row = $(rowHtml),
row = [];
$row.find('td').each(function (i,cell) {
var $cell = $(cell);
// strip out any <sup> elems
$cell.find('sup').remove();
var text = $(cell).text(),
hasTick = $cell.html().search('/images/specs/tick.gif') !== -1,
hasDash = text === '-';
if (hasTick) {
// assign true boolean as cell value if cell has a tick image
row.push(tickUrl);
}
else if (hasDash) {
row.push(falseString);
}
else {
row.push(text);
}
});
category.specs.push(row);
});
style.categories.push(category);
});
// Append Body Style object to main specs array
specs.push( style );
}
/**
* Transform text into a URL slug: spaces turned into dashes, remove non alnum
* @param string text
*/
function slugify(text) {
text = text.replace(/[^-a-zA-Z0-9,&\s]+/ig, '');
text = text.replace(/-/gi, "_");
text = text.replace(/\s/gi, "-");
text = text.toLowerCase();
return text;
}
}
);