Node.js is a platform for building JavaScript based applications that has become very popular over the last few years. To best support this rapidly growing community, Oracle developed a new driver for Node.js which was released in January of this year as a preview release. Written in C and utilizing the Oracle Instant Client libraries, the Node.js driver is both performant and feature rich.
When it comes to generating JSON, Node.js seems like a natural choice as JSON is based on JavaScript objects. However, it’s not exactly fair to compare a solution crafted in Node.js to the PL/SQL solutions as Node.js has a clear disadvantage: it runs on a separate server. That means we have to bring the data across the network before we can transform it to the desired output. Whatever, let’s compare them anyway! 😀
Solution 1 – Emulating the PL/SQL solutions
This first solution follows the same flow as the PL/SQL based solutions. We start off by making a connection to the database and then begin creating the object we need by fetching data from the departments table. Next we go to the locations table, then the regions table, and on and on until finally we have the object we want. We then use JSON.stringify to serialize the object into the JSON result we are after.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
var oracledb = require('oracledb'); var async = require('async'); var dateFormat = 'DD-MON-YYYY'; function getDepartment(departmentId, callback) { oracledb.getConnection({ user: 'hr', password: 'welcome', connectString: 'server/XE' }, function(err, connection) { if (err) throw err; connection.execute( 'select department_id, \n' + ' department_name, \n' + ' manager_id, \n' + ' location_id \n' + 'from departments \n' + 'where department_id = :department_id', { department_id: departmentId }, function(err, results) { var department = {}; if (err) { throw err; } department.id = results.rows[0][0]; department.name = results.rows[0][1]; department.managerId = results.rows[0][2]; getLocationDetails(results.rows[0][3], department, connection, callback); } ); } ); } module.exports.getDepartment = getDepartment; function getLocationDetails(locationId, department, connection, callback) { connection.execute( 'select location_id, \n' + ' street_address, \n' + ' postal_code, \n' + ' country_id \n' + 'from locations \n' + 'where location_id = :location_id', { location_id: locationId }, function(err, results) { if (err) throw err; department.location = {}; department.location.id = results.rows[0][0]; department.location.streetAddress = results.rows[0][1]; department.location.postalCode = results.rows[0][2]; getCountryDetails(results.rows[0][3], department, connection, callback); } ); } function getCountryDetails(countryId, department, connection, callback) { connection.execute( 'select country_id, \n' + ' country_name, \n' + ' region_id \n' + 'from countries \n' + 'where country_id = :country_id', { country_id: countryId }, function(err, results) { if (err) throw err; department.location.country = {}; department.location.country.id = results.rows[0][0]; department.location.country.name = results.rows[0][1]; department.location.country.regionId = results.rows[0][2]; getManagerDetails(department, connection, callback); } ); } function getManagerDetails(department, connection, callback) { connection.execute( 'select employee_id, \n' + ' first_name || \' \' || last_name, \n' + ' salary, \n' + ' job_id \n' + 'from employees \n' + 'where employee_id = :manager_id', { manager_id: department.managerId }, function(err, results) { if (err) throw err; delete department.managerId; department.manager = {}; if (results.rows.length) { department.manager.id = results.rows[0][0]; department.manager.name = results.rows[0][1]; department.manager.salary = results.rows[0][2]; department.manager.jobId = results.rows[0][3]; } getManagerJobDetails(department, connection, callback); } ); } function getManagerJobDetails(department, connection, callback) { if (department.manager.id) { connection.execute( 'select job_id, \n' + ' job_title, \n' + ' min_salary, \n' + ' max_salary \n' + 'from jobs \n' + 'where job_id = :job_id', { job_id: department.manager.jobId }, function(err, results) { if (err) throw err; delete department.manager.jobId; department.manager.job = {}; department.manager.job.id = results.rows[0][0]; department.manager.job.title = results.rows[0][1]; department.manager.job.minSalary = results.rows[0][2]; department.manager.job.maxSalary = results.rows[0][3]; getEmployees(department, connection, callback); } ); } else { getEmployees(department, connection, callback); } } function getEmployees(department, connection, callback) { connection.execute( 'select employee_id, \n' + ' first_name || \' \' || last_name, \n' + ' case when hire_date < to_date(\'01-01-2005\', \'DD-MM-YYYY\') then 1 else 0 end is_senior, ' + ' to_char(hire_date, \'' + dateFormat + '\'), \n' + ' commission_pct \n' + 'from employees \n' + 'where department_id = :department_id', { department_id: department.id }, function(err, results) { if (err) throw err; department.employees = []; results.rows.forEach(function(row) { var emp = {}; emp.id = row[0]; emp.name = row[1]; emp.isSenior = row[2] === 1; emp.hireDate = row[3]; emp.commissionPct = row[4]; department.employees.push(emp); }); async.eachSeries( department.employees, function(emp, cb) { connection.execute( 'select job_id, \n' + ' department_id, \n' + ' to_char(start_date, \'' + dateFormat + '\'), \n' + ' to_char(end_date, \'' + dateFormat + '\') \n' + 'from job_history \n' + 'where employee_id = :employee_id', { employee_id: emp.id }, function(err, results) { if (err) { cb(err); return; } emp.jobHistory = []; results.rows.forEach(function(row) { var job = {}; job.id = row[0]; job.departmentId = row[1]; job.startDate = row[2]; job.endDate = row[3]; emp.jobHistory.push(job); }); cb(); } ); }, function(err) { if (err) throw err; callback(null, JSON.stringify(department)); connection.release(function(err) { if (err) { console.error(err); } }); } ); } ); } |
Output
When called with the department id set to 10, the function returns the serialized JSON that matches the goal 100%.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
{ "id": 10, "name": "Administration", "location": { "id": 1700, "streetAddress": "2004 Charade Rd", "postalCode": "98199", "country": { "id": "US", "name": "United States of America", "regionId": 2 } }, "manager": { "id": 200, "name": "Jennifer Whalen", "salary": 4400, "job": { "id": "AD_ASST", "title": "Administration Assistant", "minSalary": 3000, "maxSalary": 6000 } }, "employees": [ { "id": 200, "name": "Jennifer Whalen", "isSenior": true, "commissionPct": null, "jobHistory": [ { "id": "AD_ASST", "departmentId": 90, "startDate": "17-SEP-1995", "endDate": "17-JUN-2001" }, { "id": "AC_ACCOUNT", "departmentId": 90, "startDate": "01-JUL-2002", "endDate": "31-DEC-2006" } ] } ] } |
So we got the output we were after, but what about performance? Let’s explore the test results…
Test results
When I finished the solutions for PL/JSON and APEX_JSON, I wanted to see if one was faster than the other. I ran a very simple test: I generated the JSON for all 27 departments in the HR schema 100 times in a loop (2,700 invocations of the solution). APEX_JSON finished in around 3.5 seconds while PL/JSON took 17 seconds. How long did it take the Node.js solution from above to do this? 136 seconds! What??? Node.js must be slow, right? Well, not exactly…
You may have noticed that, on line 5, I used the base driver class to get a connection to the database. If I was just doing this once the code would be fine as is, but 2,700 times? That’s not good. In fact, that’s really bad! But what’s a Node.js developer to do?
Using a connection pool
The Node.js driver has supported connection pools from the beginning. The idea is that, rather than incur the cost of making a connection to the database each time we need one, we’ll just grab a connection from a pool of connections that have already been established and release it back to the pool when we’re done with it. Best of all, this is really simple!
Here’s a new module that I’ll use to create, store, and fetch the connection pool:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
var oracledb = require('oracledb'); var connectionPool; //createPool is invoked in a separate test file (not shown) function createPool(callback) { oracledb.createPool( { user: 'hr', password: 'welcome', connectString: 'server/XE', poolMin: 2, poolMax: 20, poolIncrement: 2, poolTimeout: 120 }, function(err, pool) { if (err) throw err; connectionPool = pool; callback(); } ); } module.exports.createPool = createPool; function getPool() { return connectionPool; } module.exports.getPool = getPool; |
With that in place, I can update the test code to open the connection pool prior to starting the test. Then I just need to update the solution to use the connection pool:
1 2 3 4 5 6 7 8 9 10 11 12 |
var pool = require(__dirname + '/pool'); var async = require('async'); var dateFormat = 'DD-MON-YYYY'; function getDepartment(departmentId, callback) { pool.getPool().getConnection( function(err, connection) { if (err) throw err; connection.execute( 'select department_id, \n' + ... |
That’s it! I just swapped out the driver module for the pool module and used it to get a connection instead. How much did using the connection pool help? With that one simple change the test completed in 21.5 seconds! Wow, that’s just a little longer than it took PL/JSON. So Node.js is still slow, right? Well, not exactly… 😉
Solution 2 – Optimizing for Node.js
Remember when I said that the solution mimicked the PL/SQL code? There are two major problems with doing this in Node.js:
- Excessive round trips: Each query we’re executing is a round trip to the database. In PL/SQL, this is just a context switch from the PL/SQL to the SQL engine and back. I’m not saying we shouldn’t minimize context switches in PL/SQL, we should, but in Node.js this is a round trip across the network! By using the database to do some joins we can reduce the number of queries from 7 to 3.
- Sequential execution: The way the PL/SQL solutions were written, a query was executed, the results were processed, and then the next query was executed. This sequence was repeated until all work was complete. It would have been nice to be able to do some of this work in parallel. While Oracle does have some options for doing work in parallel, such as Parallel Execution and DBMS_PARALLEL_EXECUTE, PL/SQL is for the most part a single threaded environment. I could probably have worked some magic to execute the queries and process the results in parallel, perhaps via scheduled jobs or Advanced Queues, but it would have been difficult using PL/SQL.
However, this is an area where Node.js shines! Doing work in parallel is quite easy with the the async module. In the following solution, I use async’s parallel method to fire off three functions at the same time: the first builds the basic department object, the second builds the employees array, and the last builds the jobHistory array. The final function, which is fired when all the others have completed, puts the results of the first three functions together into a single object before returning the JSON.
Here’s the solution optimized for Node.js:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
var pool = require(__dirname + '/pool'); var async = require('async'); var dateFormat = 'DD-MON-YYYY'; function getDepartment(departmentId, callback) { var department = {}; var employees = []; var empMap = {}; var jobHistory = []; async.parallel( [ function (callback) { pool.getPool().getConnection( function (err, connection) { if (err) { throw err; } connection.execute( 'select dept.department_id, \n' + ' dept.department_name, \n' + ' loc.location_id, \n' + ' loc.street_address, \n' + ' loc.postal_code, \n' + ' ctry.country_id, \n' + ' ctry.country_name, \n' + ' ctry.region_id, \n' + ' mgr.employee_id, \n' + ' mgr.first_name || \' \' || mgr.last_name, \n' + ' mgr.salary, \n' + ' mgr_job.job_id, \n' + ' mgr_job.job_title, \n' + ' mgr_job.min_salary, \n' + ' mgr_job.max_salary \n' + 'from departments dept \n' + 'join locations loc \n' + ' on dept.location_id = loc.location_id \n' + 'join countries ctry \n' + ' on loc.country_id = ctry.country_id \n' + 'left join employees mgr \n' + ' on dept.manager_id = mgr.employee_id \n' + 'left join jobs mgr_job \n ' + ' on mgr.job_id = mgr_job.job_id \n' + 'where dept.department_id = :department_id', { department_id: departmentId }, function (err, results) { var deptRow; if (err) { callback(err); return; } deptRow = results.rows[0]; department.id = deptRow[0]; department.name = deptRow[1]; department.location = {}; department.location.id = deptRow[2]; department.location.streetAddress = deptRow[3]; department.location.postalCode = deptRow[4]; department.location.country = {}; department.location.country.id = deptRow[5]; department.location.country.name = deptRow[6]; department.location.country.regionId = deptRow[7]; department.manager = {}; if (deptRow[8]) { department.manager.id = deptRow[8]; department.manager.name = deptRow[9]; department.manager.salary = deptRow[10]; department.manager.job = {}; department.manager.job.id = deptRow[11]; department.manager.job.title = deptRow[12]; department.manager.job.minSalary = deptRow[13]; department.manager.job.maxSalary = deptRow[14]; } connection.release(function (err) { if (err) { console.error(err); } callback(null); }); } ); } ); }, function (callback) { pool.getPool().getConnection( function (err, connection) { if (err) { throw err; } connection.execute( 'select employee_id, \n' + ' first_name || \' \' || last_name, \n' + ' case when hire_date < to_date(\'01-01-2005\', \'DD-MM-YYYY\') then 1 else 0 end is_senior, ' + ' commission_pct \n' + 'from employees \n' + 'where department_id = :department_id', { department_id: departmentId }, function (err, results) { var empRows; if (err) { callback(err); return; } empRows = results.rows; empRows.forEach(function (empRow) { var emp = {}; emp.id = empRow[0]; emp.name = empRow[1]; emp.isSenior = empRow[2] === 1; //conversion of 1 or 0 to Boolean emp.commissionPct = empRow[3]; emp.jobHistory = []; employees.push(emp); empMap[emp.id] = emp; }); connection.release(function (err) { if (err) { console.error(err); } callback(null); }); } ); } ); }, function (callback) { pool.getPool().getConnection( function (err, connection) { if (err) { throw err; } connection.execute( 'select employee_id, \n' + ' job_id, \n' + ' department_id, \n' + ' to_char(start_date, \'' + dateFormat + '\'), \n' + ' to_char(end_date, \'' + dateFormat + '\') \n' + 'from job_history \n' + 'where employee_id in ( \n' + ' select employee_id \n ' + ' from employees \n' + ' where department_id = :department_id \n' + ')', { department_id: departmentId }, function (err, results) { var jobRows; if (err) { callback(err); return; } jobRows = results.rows; jobRows.forEach(function (jobRow) { var job = {}; job.employeeId = jobRow[0]; job.id = jobRow[1]; job.departmentId = jobRow[2]; job.startDate = jobRow[3]; job.endDate = jobRow[4]; jobHistory.push(job); }); connection.release(function (err) { if (err) { console.error(err); } callback(null); }); } ); } ); } ], function (err, results) { if (err) throw err; department.employees = employees; jobHistory.forEach(function (job) { empMap[job.employeeId].jobHistory.push(job); delete job.employeeId; }); callback(null, JSON.stringify(department)); } ); } module.exports.getDepartment = getDepartment; |
How fast did that solution finish the test? 7.8 seconds! Not too shabby! That’s faster than the PL/JSON solution but not quite as fast as APEX_JSON. But could we optimize even further?
Client Result Caching
Because I was running my tests on Oracle XE I wasn’t able to do a final optimization that would have been possible with the Enterprise Edition of the database: Client Result Caching. With Client Result Caching, Oracle can automatically maintain a cache of the data on the server where Node.js is running. This could have eliminated some round trips and data having to move across the network. I’ll revisit this feature in a future post were we can explore it in detail.
I’m not convinced. This is kinda odd for Oracle.
Hi Jokester,
What’s odd about it? Data has been stored relationally and converted to other formats (CSV, XML, etc.) for a very long time. JSON is just the latest incarnation. Think it will it be the last?
Also, if you prefer, you can use Oracle as a JSON document store if you like (or a hybrid) and Oracle 12c has lots of goodies for working JSON data. I’ll be covering a lot of this in upcoming posts.
Thank you for this very useful info! I’m developing a web mapping application for a client working with an Oracle DB and your article really helped, thanks again for taking the time to share this.
Hi Melegy,
Thanks for the feedback, I’m glad it helped!
Regards,
Dan
When using the pool, are you using the same connection for all async calls. Shouldn’t you call getConnection from the pool for each call? This way each async call has their own conextion.
Hi Gus,
See the section titled “Using a connection pool”. 🙂
Regards,
Dan