hybrid security analysis of web JavaScript code via dynamic partial evaluation Omer Tripp Pietro Ferrara Marco Pistoia ! IBM Research, NY Work published at the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA 2014) Recipient of the ACM SIGSOFT Distinguished Paper Award 1 web client-side code* 38% 30% 23% 15% 25% 30% 8% 0% 5% 9 years ago 4 years ago * data due to IBM application security research team 2 today client-side vulnerabilities* DOM-based XSS var pos = document.location.href.indexOf("name="); document.write(document.URL.substring(pos, document.URL.length)); open redirect var pos = document.location.href.indexOf("target="); var val = document.location.href.substring(pos); document.location.href = "http://" + val; >15% vulnerable to these attacks! * data due to IBM application security research team 3 reflected XSS web app! script reflected into HTML response without proper encoding attacker’s evil script executed using victim’s credentials link embedded with evil script attacker! 4! victim! DOM-based XSS ibm.com! NO reflection into HTML response evil script NOT sent to server www.ibm.com/index.html? name=<script>…</script> attacker! 5 victim! client-side vulnerabilities* DOM-based XSS var pos = document.location.href.indexOf("name="); document.write(document.URL.substring(pos, document.URL.length)); open redirect var pos = document.location.href.indexOf("target="); var val = document.location.href.substring(pos); document.location.href = "http://" + val; >15% vulnerable to these attacks! * data due to IBM application security research team 6 open redirect ibm.com! attacker.com! www.ibm.com/index.html?target=attacker.com attacker! 7 victim! client-side vulnerabilities* DOM-based XSS var pos = document.location.href.indexOf("name="); document.write(document.URL.substring(pos, document.URL.length)); open redirect var pos = document.location.href.indexOf("target="); var val = document.location.href.substring(pos); document.location.href = "http://" + val; >15% vulnerable to these attacks! * data due to IBM application security research team 8 JavaScript complexities 9! JavaScript complexities eval and its relatives! eval("document.write('evil')"); 10! JavaScript complexities reflective property access! var a = "foo" + "bar"; var b = obj[a]; 11! JavaScript complexities arguments array! bar() { if (arguments.length > 3) foo(arguments[2]); } bar(1, ”x”, 3) 12! JavaScript complexities prototype-chain property lookup! function F() { this.f = document.location; } function G() { } G.prototype = new F(); var g = new G(); write(g.bar); 13! JavaScript complexities function pointers! var m = function() { … } var k = function(f) { f(); } k(m); 14! JavaScript complexities lexical scoping! function foo() { var y = 42; var bar = function() { write(y); } } 15! JavaScript complexities …! 16! motivating example var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 17 motivating example var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 18 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 19 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 20 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 21 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 22 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 23 taint analysis var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } BOOM?! (real-world JavaScript code from the Alcatel-Lucent website)! 24 dynamic partial evaluation var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); if (url_check > -1) { var result = str.substring(0, url_check); result = result + ‘login.jsp’ + str.substring(url_check + search_term.length), str.length); document.url = result; // sink } (real-world JavaScript code from the Alcatel-Lucent website)! 25 dynamic partial evaluation var search_term = ‘login.html’; var str = document.location; “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); if (url_check > -1) { str.substring(0, url_check); var result = “http://x.com/“; result + ‘login.jsp’ + result= “http://x.com/login.jsp?p1=v1”; str.substring(url_check + search_term.length),str.length); document.url = result; } (real-world JavaScript code from the Alcatel-Lucent website)! 26 dynamic partial evaluation var search_term = ‘login.html’; var str = “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); if (url_check > -1) { var result = “http://x.com/“; result= “http://x.com/login.jsp?p1=v1”; document.url = result; } (real-world JavaScript code from the Alcatel-Lucent website)! 27 our hybrid approach Dynamic Oracle! • Crawls Web site! • Collects dynamic information! • Links references to the DOM with partially concretized values! GET http://x.com/login.html?p1=v1! var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … http://x.com/login.html?p1=v1! var search_term = ‘login.html’; var str = “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); … Traditional Static Taint Analysis! • Looks for flows from sources to sinks! 28 Static String Analysis! • Determines which parts of a string are beyond user control! • Leverages string information for better classification of findings! our hybrid approach Dynamic Oracle! • Crawls Web site! • Collects dynamic information! • Links references to the DOM with partially concretized values! GET http://x.com/login.html?p1=v1! var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … http://x.com/login.html?p1=v1! accuracy! var search_term = ‘login.html’; var str = “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); … Traditional Static Taint Analysis! • Looks for flows from sources to sinks! 29 Static String Analysis! • Determines which parts of a string are beyond user control! • Leverages string information for better classification of findings! our hybrid approach Dynamic Oracle! • Crawls Web site! • Collects dynamic information! • Links references to the DOM with partially concretized values! GET http://x.com/login.html?p1=v1! var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … http://x.com/login.html?p1=v1! coverage! var search_term = ‘login.html’; var str = “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); … Traditional Static Taint Analysis! • Looks for flows from sources to sinks! 30 Static String Analysis! • Determines which parts of a string are beyond user control! • Leverages string information for better classification of findings! our hybrid approach Dynamic Oracle! • Crawls Web site! • Collects dynamic information! • Links references to the DOM with partially concretized values! GET http://x.com/login.html?p1=v1! var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … var search_term = ‘login.html’; var str = document.url; // source var url_check = str.indexOf(search_term); … http://x.com/login.html?p1=v1! lightweight! var search_term = ‘login.html’; var str = “http://x.com/login.html?p1=v1”; var url_check = str.indexOf(search_term); … Traditional Static Taint Analysis! • Looks for flows from sources to sinks! 31 Static String Analysis! • Determines which parts of a string are beyond user control! • Leverages string information for better classification of findings! static analysis: JSA — intuition http://x.com/login.html?p1=v1 document.location system-controlled prefix http://x.com/login.html? 32 attacker-controlled suffix .* static analysis: JSA — domain var str = document.location; var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { var tmp = str.substring(0,n); document.location.href = tmp; } 33 static analysis: JSA — domain var str = “HTTP://X.Com/login.html?p1=v1”; var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { var tmp = str.substring(0,n); document.location.href = tmp; } 34 static analysis: JSA — domain Prx var str = “HTTP://X.Com/login.html?p1=v1”; var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { var tmp = str.substring(0,n); document.location.href = tmp; } 35 {str},HTTP://X.Com/login.html?,T Idx static analysis: JSA — domain Prx var str = “HTTP://X.Com/login.html?p1=v1”; {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { var tmp = str.substring(0,n); document.location.href = tmp; } 36 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T Idx static analysis: JSA — domain Prx var str = “HTTP://X.Com/login.html?p1=v1”; Idx {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T var tmp = str.substring(0,n); document.location.href = tmp; } 37 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T {n: 13} static analysis: JSA — domain Prx var str = “HTTP://X.Com/login.html?p1=v1”; Idx {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T var tmp = str.substring(0,n); document.location.href = tmp; } 38 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T {n: 13} {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T ………………………………………………………………………………………………… {tmp},HTTP://X.Com/,F {n: 13} ………………… static analysis: JSA — domain Prx var str = “HTTP://X.Com/login.html?p1=v1”; Idx {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T var tmp = str.substring(0,n); document.location.href = tmp; } 39 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T {n: 13} {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T ………………………………………………………………………………………………… {tmp},HTTP://X.Com/,F {n: 13} ………………… static analysis: JSA — transformers Prx var str = “HTTP://X.Com/login.html?p1=v1”; {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { var tmp = str.substring(0,n); document.location.href = tmp; } 40 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T Idx static analysis: JSA — transformers Prx var str = “HTTP://X.Com/login.html?p1=v1”; Idx {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T var tmp = str.substring(0,n); document.location.href = tmp; } 41 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T {n: 13} static analysis: JSA — transformers Prx var str = “HTTP://X.Com/login.html?p1=v1”; Idx {str},HTTP://X.Com/login.html?,T var lstr = str.toLowerCase(); var n = lstr.indexOf(“login.html”); if (n > -1) { {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T var tmp = str.substring(0,n); document.location.href = tmp; } 42 {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T {n: 13} {str},HTTP://X.Com/login.html?,T {lstr},http://x.com/login.html?,T ………………………………………………………………………………………………… {tmp},HTTP://X.Com/,F {n: 13} ………………… note on implementation JSA written atop the WALA framework optimization: staged analysis 43 IFDS problem / distributive analysis 1st stage: taint analysis 2nd stage: JSA applied to source/sink pairs not eliminated by taint analysis featured in IBM Security AppScan Standard Edition (AppScan Std) V8.6 evaluation — benchmarks 44 170,000 webpages / 675 websites: Fortune 500 top 100 (www.web100.com) handpicked security and IT websites up to 500 pages per site via nonintrusive crawling no login only link crawling evaluation — benchmarks var pageUrl = window.location; var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value; var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl; openPopupWindow(url); (from the Corning website)! var url = window.location.href; var i = url.indexOf("?"); if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");} if (i<0) { return; } url = url.substring(0, i+1); url = url+"livehelp/?pluginID="+a; window.location.href = url; (from the IBM Team Concert website)! 45 evaluation — benchmarks var pageUrl = window.location; var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value; var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl; openPopupWindow(url); (from the Corning website)! Both cId and pageURL flow into the! target URL’s query string, after the ‘?’! var url = window.location.href; var i = url.indexOf("?"); if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");} if (i<0) { return; } url = url.substring(0, i+1); url = url+"livehelp/?pluginID="+a; window.location.href = url; (from the IBM Team Concert website)! 46 evaluation — benchmarks var pageUrl = window.location; var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value; var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl; openPopupWindow(url); (from the Corning website)! Both cId and pageURL flow into the! target URL’s query string, after the ‘?’! var url = window.location.href; var i = url.indexOf("?"); if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");} if (i<0) { return; } url = url.substring(0, i+1); url = url+"livehelp/?pluginID="+a; window.location.href = url; (from the IBM Team Concert website)! 47 Computes a prefix of the URL string that! lies within the host path, and appends! constant string livehelp/?pluginID= 1st experiment: comparison with taint analysis 48 compared against commercial taint analysis (used in AppScan Std V8.5) entire set of webpages manual classification of results by professional ethical hacker (from IBM application security team): TP/FP 1st experiment: results TPs 8000 FPs 90% reduction!! 4,443 6000 4000 2000 301 2,639 2,639 JSA taint analysis 0 49 evaluation — FPs? function changeZipRedirect(zipCodeRedirect) { var currURL = document.location.href; ...; wcmContext = currURL.split(’WCM GLOBAL CONTEXT’); var redirectStr = wcmContext[1]; ...; if (redirectStr .match(”pmapmc=”) == null) { /∗ redirect to the zipcode page ∗/ document.location.href = zipCodeRedirect + ”&redirectURL” + redirectStr; } } (from the Alltel website)! 50 evaluation — FPs? function changeZipRedirect(zipCodeRedirect) { var currURL = document.location.href; ...; wcmContext = currURL.split(’WCM GLOBAL CONTEXT’); var redirectStr = wcmContext[1]; ...; if (redirectStr .match(”pmapmc=”) == null) { /∗ redirect to the zipcode page ∗/ document.location.href = zipCodeRedirect + ”&redirectURL” + redirectStr; } } (from the Alltel website)! 51 evaluation — FPs? function changeZipRedirect(zipCodeRedirect) { var currURL = document.location.href; ...; wcmContext = currURL.split(’WCM GLOBAL CONTEXT’); var redirectStr = wcmContext[1]; ...; unresolved constant! if (redirectStr .match(”pmapmc=”) == null) { /∗ redirect to the zipcode page ∗/ document.location.href = zipCodeRedirect + ”&redirectURL” + redirectStr; } } (from the Alltel website)! 52 2nd experiment: comparison with black-box testing 53 compared against commercial testing engine (that of AppScan Std V8.6) ~10% (60 / 675) of websites sampled at random website fragment retrieved by crawler deployed locally for intrusive testing 2nd experiment: results 54 configuration! vulnerable websites! false positives! JSA enabled! 33! 4! JSA disabled! 8! 0! conclusion JavaScript security: a BIG deal taint analysis: poor user experience (>60% FPs!) the key: dynamic partial evaluation 55 high coverage precision boost with low overhead JSA: novel form of string analysis scalable (staged solution atop taint analysis) 90% reduction in FPs! <script> alert(‘thank you!’) </script> 56