switched jsdom for cheerio. Epic speed improvements!
This commit is contained in:
parent
ce9317034d
commit
c1b9977bf8
3 changed files with 111 additions and 128 deletions
|
@ -9,12 +9,12 @@
|
|||
"dependencies": {
|
||||
"express": "3.1.0",
|
||||
"jade": "*",
|
||||
"jsdom": "0.3.4",
|
||||
"request": "2.12.0",
|
||||
"underscore": "*"
|
||||
"underscore": "*",
|
||||
"cheerio": "~0.10.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": "0.8.x",
|
||||
"npm": "1.1.x"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
exports.index = function(req, res){
|
||||
|
||||
var _ = require('underscore'),
|
||||
jsdom = require('jsdom'),
|
||||
cheerio = require('cheerio'),
|
||||
request = require('request');
|
||||
|
||||
|
||||
|
@ -28,18 +28,11 @@ exports.index = function(req, res){
|
|||
// TODO: render error page here...
|
||||
}
|
||||
|
||||
jsdom.env(
|
||||
{
|
||||
html: body,
|
||||
scripts: ['http://code.jquery.com/jquery.min.js']
|
||||
},
|
||||
parsePage
|
||||
);
|
||||
parsePage(body);
|
||||
}
|
||||
|
||||
function parsePage(error, window) {
|
||||
var $ = window.jQuery,
|
||||
$body = $(window.document.body),
|
||||
function parsePage(body) {
|
||||
var $ = cheerio.load(body),
|
||||
$vehicleLinks = $('#vehicles').children('a');
|
||||
|
||||
$vehicleLinks.each(function(i,link) {
|
||||
|
|
218
routes/specs.js
218
routes/specs.js
|
@ -6,7 +6,7 @@
|
|||
exports.fetch = function(req, res){
|
||||
|
||||
var _ = require('underscore'),
|
||||
jsdom = require('jsdom'),
|
||||
cheerio = require('cheerio'),
|
||||
request = require('request');
|
||||
|
||||
var model = req.params.model,
|
||||
|
@ -33,122 +33,112 @@ exports.fetch = function(req, res){
|
|||
var tickUrl = 'http://www.mazda.com.au/brochures/base-framework/img/specs/specs_tick.gif',
|
||||
falseString = '-';
|
||||
|
||||
jsdom.env(
|
||||
var $ = cheerio.load(body),
|
||||
$styles = $('.spec-body');
|
||||
|
||||
// For each Body Style (Sedan, Wagon, etc.)
|
||||
$styles.each(_parseBodyStyleHtml);
|
||||
|
||||
// console.log(specs);
|
||||
// console.log('FIRST BODY STYLE CATEGORIES YO: ', _.pluck(specs[0].categories, 'name'));
|
||||
|
||||
// res.send('There are ' + $styles.length + ' body style(s) for the ' + model);
|
||||
var carName = $('h1').text().replace(' Specifications','');
|
||||
res.render(
|
||||
'specs',
|
||||
{
|
||||
html: body,
|
||||
scripts: ['http://code.jquery.com/jquery.min.js']
|
||||
},
|
||||
function (error, window) {
|
||||
var $ = window.jQuery,
|
||||
$body = $(window.document.body),
|
||||
$styles = $('.spec-body');
|
||||
|
||||
// For each Body Style (Sedan, Wagon, etc.)
|
||||
$styles.each(_parseBodyStyleHtml);
|
||||
|
||||
// console.log(specs);
|
||||
// console.log('FIRST BODY STYLE CATEGORIES YO: ', _.pluck(specs[0].categories, 'name'));
|
||||
|
||||
// res.send('There are ' + $styles.length + ' body style(s) for the ' + model);
|
||||
var carName = $body.find('h1').text().replace(' Specifications','');
|
||||
res.render(
|
||||
'specs',
|
||||
{
|
||||
title: carName,
|
||||
styles: specs,
|
||||
modifier: modifier,
|
||||
pretty: !isMinified
|
||||
}
|
||||
);
|
||||
|
||||
function _parseBodyStyleHtml(i,bodyStyle) {
|
||||
var style = {
|
||||
name: '',
|
||||
image: '',
|
||||
slug: '',
|
||||
grades: [],
|
||||
categories: []
|
||||
};
|
||||
|
||||
var $bodyStyle = $(bodyStyle),
|
||||
$cats = $bodyStyle.find('.spec-cat');
|
||||
|
||||
// Create array of grade names (e.g. 'Sport', 'Touring', etc.)
|
||||
$cats.first().find('table thead th').each(function (i,grade) {
|
||||
// the first <th> is not a grade header
|
||||
if (i === 0) {
|
||||
return;
|
||||
}
|
||||
style.grades.push( $.trim($(grade).text()) );
|
||||
});
|
||||
|
||||
// Assign Body Style name (e.g. 'Sedan')
|
||||
style.name = $.trim( $bodyStyle.find('h3').text() );
|
||||
style.image = 'http://www.mazda.com.au' + $('#specs-body').find('li').eq(i).find('img').attr('src');
|
||||
style.slug = slugify(style.name);
|
||||
|
||||
// Collect specs for each category (e.g. 'Powertrain', 'Chassis', etc.)
|
||||
$cats.each(function (i,cat) {
|
||||
var $cat = $(cat),
|
||||
$rows = $cat.find('tbody tr');
|
||||
|
||||
var category = {};
|
||||
// category.name = $.trim( $cat.children('h4').text() );
|
||||
category.name = $.trim( $('.specs-tabs').first().find('li').eq(i).text() );
|
||||
category.specs = [];
|
||||
category.slug = slugify(category.name);
|
||||
|
||||
// collect each row's cell data
|
||||
$rows.each(function (i,rowHtml) {
|
||||
var $row = $(rowHtml),
|
||||
row = [];
|
||||
|
||||
$row.find('td').each(function (i,cell) {
|
||||
var $cell = $(cell);
|
||||
|
||||
// strip out any <sup> elems
|
||||
$cell.find('sup').remove();
|
||||
|
||||
var text = $.trim( $(cell).text() ),
|
||||
hasTick = $cell.html().search('/images/specs/tick.gif') !== -1,
|
||||
hasDash = text === '-';
|
||||
|
||||
if (hasTick) {
|
||||
// assign true boolean as cell value if cell has a tick image
|
||||
row.push(tickUrl);
|
||||
}
|
||||
else if (hasDash) {
|
||||
row.push(falseString);
|
||||
}
|
||||
else {
|
||||
row.push(text);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
category.specs.push(row);
|
||||
});
|
||||
|
||||
style.categories.push(category);
|
||||
});
|
||||
|
||||
// Append Body Style object to main specs array
|
||||
specs.push( style );
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform text into a URL slug: spaces turned into dashes, remove non alnum
|
||||
* @param string text
|
||||
*/
|
||||
function slugify(text) {
|
||||
text = text.replace(/[^-a-zA-Z0-9,&\s]+/ig, '');
|
||||
text = text.replace(/-/gi, "_");
|
||||
text = text.replace(/\s/gi, "-");
|
||||
text = text.toLowerCase();
|
||||
return text;
|
||||
}
|
||||
title: carName,
|
||||
styles: specs,
|
||||
modifier: modifier,
|
||||
pretty: !isMinified
|
||||
}
|
||||
);
|
||||
|
||||
function _parseBodyStyleHtml(i,bodyStyle) {
|
||||
var style = {
|
||||
name: '',
|
||||
image: '',
|
||||
slug: '',
|
||||
grades: [],
|
||||
categories: []
|
||||
};
|
||||
|
||||
var $bodyStyle = $(bodyStyle),
|
||||
$cats = $bodyStyle.find('.spec-cat');
|
||||
|
||||
// Create array of grade names (e.g. 'Sport', 'Touring', etc.)
|
||||
$cats.first().find('table thead th').each(function (i,grade) {
|
||||
// the first <th> is not a grade header
|
||||
if (i === 0) {
|
||||
return;
|
||||
}
|
||||
style.grades.push( $(grade).text() );
|
||||
});
|
||||
|
||||
// Assign Body Style name (e.g. 'Sedan')
|
||||
style.name = $bodyStyle.find('h3').text();
|
||||
style.image = 'http://www.mazda.com.au' + $('#specs-body').find('li').eq(i).find('img').attr('src');
|
||||
style.slug = slugify(style.name);
|
||||
|
||||
// Collect specs for each category (e.g. 'Powertrain', 'Chassis', etc.)
|
||||
$cats.each(function (i,cat) {
|
||||
var $cat = $(cat),
|
||||
$rows = $cat.find('tbody tr');
|
||||
|
||||
var category = {};
|
||||
category.name = $('.specs-tabs').first().find('li').eq(i).text();
|
||||
category.specs = [];
|
||||
category.slug = slugify(category.name);
|
||||
|
||||
// collect each row's cell data
|
||||
$rows.each(function (i,rowHtml) {
|
||||
var $row = $(rowHtml),
|
||||
row = [];
|
||||
|
||||
$row.find('td').each(function (i,cell) {
|
||||
var $cell = $(cell);
|
||||
|
||||
// strip out any <sup> elems
|
||||
$cell.find('sup').remove();
|
||||
|
||||
var text = $(cell).text(),
|
||||
hasTick = $cell.html().search('/images/specs/tick.gif') !== -1,
|
||||
hasDash = text === '-';
|
||||
|
||||
if (hasTick) {
|
||||
// assign true boolean as cell value if cell has a tick image
|
||||
row.push(tickUrl);
|
||||
}
|
||||
else if (hasDash) {
|
||||
row.push(falseString);
|
||||
}
|
||||
else {
|
||||
row.push(text);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
category.specs.push(row);
|
||||
});
|
||||
|
||||
style.categories.push(category);
|
||||
});
|
||||
|
||||
// Append Body Style object to main specs array
|
||||
specs.push( style );
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform text into a URL slug: spaces turned into dashes, remove non alnum
|
||||
* @param string text
|
||||
*/
|
||||
function slugify(text) {
|
||||
text = text.replace(/[^-a-zA-Z0-9,&\s]+/ig, '');
|
||||
text = text.replace(/-/gi, "_");
|
||||
text = text.replace(/\s/gi, "-");
|
||||
text = text.toLowerCase();
|
||||
return text;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
|
Loading…
Reference in a new issue