pdf

hybrid security analysis of web
JavaScript code via dynamic
partial evaluation
Omer Tripp
Pietro Ferrara
Marco Pistoia !
IBM Research, NY
Work published at the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA 2014)
Recipient of the ACM SIGSOFT Distinguished Paper Award
1
web client-side code*
38%
30%
23%
15%
25%
30%
8%
0%
5%
9 years ago
4 years ago
* data due to IBM application security research team
2
today
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");
document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");
var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!
* data due to IBM application security research team
3
reflected XSS
web app!
script reflected into
HTML response without
proper encoding
attacker’s evil script
executed using victim’s credentials
link embedded
with evil script
attacker!
4!
victim!
DOM-based XSS
ibm.com!
NO reflection into
HTML response
evil script NOT
sent to server
www.ibm.com/index.html?
name=<script>…</script>
attacker!
5
victim!
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");
document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");
var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!
* data due to IBM application security research team
6
open redirect
ibm.com!
attacker.com!
www.ibm.com/index.html?target=attacker.com
attacker!
7
victim!
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");
document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");
var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!
* data due to IBM application security research team
8
JavaScript complexities
9!
JavaScript complexities
eval and its relatives!
eval("document.write('evil')");
10!
JavaScript complexities
reflective property access!
var a = "foo" + "bar";
var b = obj[a];
11!
JavaScript complexities
arguments array!
bar() { if (arguments.length > 3) foo(arguments[2]); }
bar(1, ”x”, 3)
12!
JavaScript complexities
prototype-chain property lookup!
function F() { this.f = document.location; }
function G() { }
G.prototype = new F(); var g = new G(); write(g.bar);
13!
JavaScript complexities
function pointers!
var m = function() { … }
var k = function(f) { f(); }
k(m);
14!
JavaScript complexities
lexical scoping!
function foo() {
var y = 42;
var bar = function() { write(y); } }
15!
JavaScript complexities
…!
16!
motivating example
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
17
motivating example
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
18
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
19
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
20
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
21
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
22
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
23
taint analysis
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
BOOM?!
(real-world JavaScript code from the Alcatel-Lucent website)!
24
dynamic partial evaluation
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
25
dynamic partial evaluation
var search_term = ‘login.html’;
var str = document.location;
“http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
if (url_check > -1) {
str.substring(0, url_check);
var result = “http://x.com/“;
result + ‘login.jsp’ +
result= “http://x.com/login.jsp?p1=v1”;
str.substring(url_check +
search_term.length),str.length);
document.url = result;
}
(real-world JavaScript code from the Alcatel-Lucent website)!
26
dynamic partial evaluation
var search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = “http://x.com/“;
result= “http://x.com/login.jsp?p1=v1”;
document.url = result;
}
(real-world JavaScript code from the Alcatel-Lucent website)!
27
our hybrid approach
Dynamic Oracle!
•  Crawls Web site!
•  Collects dynamic information!
•  Links references to the DOM with partially concretized values!
GET http://x.com/login.html?p1=v1!
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
http://x.com/login.html?p1=v1!
var search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
…
Traditional Static Taint Analysis!
•  Looks for flows from sources to sinks!
28
Static String Analysis!
•  Determines which parts of a string are beyond user control!
•  Leverages string information for better classification of findings!
our hybrid approach
Dynamic Oracle!
•  Crawls Web site!
•  Collects dynamic information!
•  Links references to the DOM with partially concretized values!
GET http://x.com/login.html?p1=v1!
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
http://x.com/login.html?p1=v1!
accuracy!
var search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
…
Traditional Static Taint Analysis!
•  Looks for flows from sources to sinks!
29
Static String Analysis!
•  Determines which parts of a string are beyond user control!
•  Leverages string information for better classification of findings!
our hybrid approach
Dynamic Oracle!
•  Crawls Web site!
•  Collects dynamic information!
•  Links references to the DOM with partially concretized values!
GET http://x.com/login.html?p1=v1!
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
http://x.com/login.html?p1=v1!
coverage!
var search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
…
Traditional Static Taint Analysis!
•  Looks for flows from sources to sinks!
30
Static String Analysis!
•  Determines which parts of a string are beyond user control!
•  Leverages string information for better classification of findings!
our hybrid approach
Dynamic Oracle!
•  Crawls Web site!
•  Collects dynamic information!
•  Links references to the DOM with partially concretized values!
GET http://x.com/login.html?p1=v1!
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
…
http://x.com/login.html?p1=v1!
lightweight!
var search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
…
Traditional Static Taint Analysis!
•  Looks for flows from sources to sinks!
31
Static String Analysis!
•  Determines which parts of a string are beyond user control!
•  Leverages string information for better classification of findings!
static analysis: JSA — intuition
http://x.com/login.html?p1=v1
document.location
system-controlled
prefix
http://x.com/login.html?
32
attacker-controlled
suffix
.*
static analysis: JSA — domain
var str = document.location;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
33
static analysis: JSA — domain
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
34
static analysis: JSA — domain
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
35
{str},HTTP://X.Com/login.html?,T
Idx
static analysis: JSA — domain
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
36
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
Idx
static analysis: JSA — domain
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
Idx
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
var tmp = str.substring(0,n);
document.location.href = tmp;
}
37
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
{n: 13}
static analysis: JSA — domain
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
Idx
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
var tmp = str.substring(0,n);
document.location.href = tmp;
}
38
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
{n: 13}
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………
{tmp},HTTP://X.Com/,F
{n: 13}
…………………
static analysis: JSA — domain
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
Idx
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
var tmp = str.substring(0,n);
document.location.href = tmp;
}
39
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
{n: 13}
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………
{tmp},HTTP://X.Com/,F
{n: 13}
…………………
static analysis: JSA —
transformers
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
40
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
Idx
static analysis: JSA —
transformers
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
Idx
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
var tmp = str.substring(0,n);
document.location.href = tmp;
}
41
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
{n: 13}
static analysis: JSA —
transformers
Prx
var str = “HTTP://X.Com/login.html?p1=v1”;
Idx
{str},HTTP://X.Com/login.html?,T
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
var tmp = str.substring(0,n);
document.location.href = tmp;
}
42
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
{n: 13}
{str},HTTP://X.Com/login.html?,T
{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………
{tmp},HTTP://X.Com/,F
{n: 13}
…………………
note on implementation
 
JSA written atop the WALA framework
 
 
optimization: staged analysis
 
 
 
43
IFDS problem / distributive analysis
1st stage: taint analysis
2nd stage: JSA applied to source/sink pairs not
eliminated by taint analysis
featured in IBM Security AppScan Standard Edition
(AppScan Std) V8.6
evaluation — benchmarks
 
 
44
170,000 webpages / 675 websites:
 
Fortune 500
 
top 100 (www.web100.com)
 
handpicked security and IT websites
up to 500 pages per site via nonintrusive crawling
 
no login
 
only link crawling
evaluation — benchmarks
var pageUrl = window.location;
var cId =
document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
(from the Corning website)!
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
(from the IBM Team Concert website)!
45
evaluation — benchmarks
var pageUrl = window.location;
var cId =
document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
(from the Corning website)!
Both cId and pageURL flow into the!
target URL’s query string, after the ‘?’!
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
(from the IBM Team Concert website)!
46
evaluation — benchmarks
var pageUrl = window.location;
var cId =
document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
(from the Corning website)!
Both cId and pageURL flow into the!
target URL’s query string, after the ‘?’!
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
(from the IBM Team Concert website)!
47
Computes a prefix of the URL string that!
lies within the host path, and appends!
constant string livehelp/?pluginID=
1st experiment: comparison with
taint analysis
 
 
 
48
compared against commercial taint analysis
(used in AppScan Std V8.5)
entire set of webpages
manual classification of results by
professional ethical hacker (from IBM
application security team): TP/FP
1st experiment: results
TPs
8000
FPs
90% reduction!!
4,443
6000
4000
2000
301
2,639
2,639
JSA
taint analysis
0
49
evaluation — FPs?
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href =
zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
(from the Alltel website)!
50
evaluation — FPs?
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href =
zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
(from the Alltel website)!
51
evaluation — FPs?
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
unresolved constant!
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href =
zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
(from the Alltel website)!
52
2nd experiment: comparison
with black-box testing
 
 
 
53
compared against commercial testing engine
(that of AppScan Std V8.6)
~10% (60 / 675) of websites sampled at
random
website fragment retrieved by crawler
deployed locally for intrusive testing
2nd experiment: results
54
configuration!
vulnerable websites!
false positives!
JSA enabled!
33!
4!
JSA disabled!
8!
0!
conclusion
 
JavaScript security: a BIG deal
 
taint analysis: poor user experience (>60% FPs!)
 
the key: dynamic partial evaluation
 
55
 
high coverage
 
precision boost with low overhead
JSA: novel form of string analysis
 
scalable (staged solution atop taint analysis)
 
90% reduction in FPs!
<script>
alert(‘thank you!’)
</script>
56