Skip to content

Commit

Permalink
JavaScript: Handle E4X/Flow lexical ambiguity.
Browse files Browse the repository at this point in the history
  • Loading branch information
Max Schaefer committed Feb 24, 2019
1 parent d6deefe commit c7e428e
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 5 deletions.
23 changes: 18 additions & 5 deletions javascript/extractor/src/com/semmle/jcorn/CustomParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -546,12 +546,21 @@ protected XMLAttributeSelector decoratorToAttributeSelector(Decorator d) {

@Override
protected Token readToken(int code) {
// skip XML processing instructions (which are allowed in E4X, but not in JSX)
// skip XML processing instructions (which are allowed in E4X, but not in JSX);
// there is a lexical ambiguity between an XML processing instruction starting a
// chunk of E4X content and a Flow type annotation (both can start with `<?`)
// hence if we can't find the closing `?>` of a putative XML processing instruction
// we backtrack and try lexing as something else
if (this.options.e4x()) {
while (code == '<') {
if (charAt(this.pos+1) == '?') {
int oldPos = this.pos;
this.pos += 2;
jsx_readUntil("?>");
if (!jsx_readUntil("?>")) {
// didn't find a closing `?>`, so backtrack
this.pos = oldPos;
break;
}
} else {
break;
}
Expand All @@ -564,7 +573,10 @@ protected Token readToken(int code) {

@Override
protected Either<Integer, Token> jsx_readChunk(StringBuilder out, int chunkStart, int ch) {
// skip XML comments, processing instructions and CDATA (which are allowed in E4X, but not in JSX)
// skip XML comments, processing instructions and CDATA (which are allowed in E4X,
// but not in JSX)
// unlike in `readToken` above, we know that we're inside JSX/E4X code, so there is
// no ambiguity with Flow type annotations
if (this.options.e4x() && ch == '<') {
if (inputSubstring(this.pos+1, this.pos+4).equals("!--")) {
out.append(inputSubstring(chunkStart, this.pos));
Expand All @@ -589,15 +601,16 @@ protected Either<Integer, Token> jsx_readChunk(StringBuilder out, int chunkStart
return super.jsx_readChunk(out, chunkStart, ch);
}

private void jsx_readUntil(String terminator) {
private boolean jsx_readUntil(String terminator) {
char fst = terminator.charAt(0);
while (this.pos+terminator.length() <= this.input.length()) {
if (charAt(this.pos) == fst &&
inputSubstring(this.pos, this.pos+terminator.length()).equals(terminator)) {
this.pos += terminator.length();
break;
return true;
}
++this.pos;
}
return false;
}
}
8 changes: 8 additions & 0 deletions javascript/extractor/tests/flow/input/ambig.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
declare module "test" {
declare type Foo = {|
|};

declare type Bar = {|
baz: () => Promise<?Foo>
|};
}
228 changes: 228 additions & 0 deletions javascript/extractor/tests/flow/output/trap/ambig.js.trap
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#10000=@"/ambig.js;sourcefile"
files(#10000,"/ambig.js","ambig","js",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
#20002=*
lines(#20002,#20001,"declare module ""test"" {","
")
#20003=@"loc,{#10000},1,1,1,23"
locations_default(#20003,#10000,1,1,1,23)
hasLocation(#20002,#20003)
#20004=*
lines(#20004,#20001," declare type Foo = {|","
")
#20005=@"loc,{#10000},2,1,2,23"
locations_default(#20005,#10000,2,1,2,23)
hasLocation(#20004,#20005)
indentation(#10000,2," ",2)
#20006=*
lines(#20006,#20001," |};","
")
#20007=@"loc,{#10000},3,1,3,5"
locations_default(#20007,#10000,3,1,3,5)
hasLocation(#20006,#20007)
indentation(#10000,3," ",2)
#20008=*
lines(#20008,#20001,"","
")
#20009=@"loc,{#10000},4,1,4,0"
locations_default(#20009,#10000,4,1,4,0)
hasLocation(#20008,#20009)
#20010=*
lines(#20010,#20001," declare type Bar = {|","
")
#20011=@"loc,{#10000},5,1,5,23"
locations_default(#20011,#10000,5,1,5,23)
hasLocation(#20010,#20011)
indentation(#10000,5," ",2)
#20012=*
lines(#20012,#20001," baz: () => Promise<?Foo>","
")
#20013=@"loc,{#10000},6,1,6,28"
locations_default(#20013,#10000,6,1,6,28)
hasLocation(#20012,#20013)
indentation(#10000,6," ",4)
#20014=*
lines(#20014,#20001," |};","
")
#20015=@"loc,{#10000},7,1,7,5"
locations_default(#20015,#10000,7,1,7,5)
hasLocation(#20014,#20015)
indentation(#10000,7," ",2)
#20016=*
lines(#20016,#20001,"}","
")
#20017=@"loc,{#10000},8,1,8,1"
locations_default(#20017,#10000,8,1,8,1)
hasLocation(#20016,#20017)
numlines(#20001,8,7,0)
#20018=*
tokeninfo(#20018,6,#20001,0,"declare")
#20019=@"loc,{#10000},1,1,1,7"
locations_default(#20019,#10000,1,1,1,7)
hasLocation(#20018,#20019)
#20020=*
tokeninfo(#20020,6,#20001,1,"module")
#20021=@"loc,{#10000},1,9,1,14"
locations_default(#20021,#10000,1,9,1,14)
hasLocation(#20020,#20021)
#20022=*
tokeninfo(#20022,4,#20001,2,"""test""")
#20023=@"loc,{#10000},1,16,1,21"
locations_default(#20023,#10000,1,16,1,21)
hasLocation(#20022,#20023)
#20024=*
tokeninfo(#20024,8,#20001,3,"{")
#20025=@"loc,{#10000},1,23,1,23"
locations_default(#20025,#10000,1,23,1,23)
hasLocation(#20024,#20025)
#20026=*
tokeninfo(#20026,6,#20001,4,"declare")
#20027=@"loc,{#10000},2,3,2,9"
locations_default(#20027,#10000,2,3,2,9)
hasLocation(#20026,#20027)
#20028=*
tokeninfo(#20028,6,#20001,5,"type")
#20029=@"loc,{#10000},2,11,2,14"
locations_default(#20029,#10000,2,11,2,14)
hasLocation(#20028,#20029)
#20030=*
tokeninfo(#20030,6,#20001,6,"Foo")
#20031=@"loc,{#10000},2,16,2,18"
locations_default(#20031,#10000,2,16,2,18)
hasLocation(#20030,#20031)
#20032=*
tokeninfo(#20032,8,#20001,7,"=")
#20033=@"loc,{#10000},2,20,2,20"
locations_default(#20033,#10000,2,20,2,20)
hasLocation(#20032,#20033)
#20034=*
tokeninfo(#20034,8,#20001,8,"{|")
#20035=@"loc,{#10000},2,22,2,23"
locations_default(#20035,#10000,2,22,2,23)
hasLocation(#20034,#20035)
#20036=*
tokeninfo(#20036,8,#20001,9,"|}")
#20037=@"loc,{#10000},3,3,3,4"
locations_default(#20037,#10000,3,3,3,4)
hasLocation(#20036,#20037)
#20038=*
tokeninfo(#20038,8,#20001,10,";")
#20039=@"loc,{#10000},3,5,3,5"
locations_default(#20039,#10000,3,5,3,5)
hasLocation(#20038,#20039)
#20040=*
tokeninfo(#20040,6,#20001,11,"declare")
#20041=@"loc,{#10000},5,3,5,9"
locations_default(#20041,#10000,5,3,5,9)
hasLocation(#20040,#20041)
#20042=*
tokeninfo(#20042,6,#20001,12,"type")
#20043=@"loc,{#10000},5,11,5,14"
locations_default(#20043,#10000,5,11,5,14)
hasLocation(#20042,#20043)
#20044=*
tokeninfo(#20044,6,#20001,13,"Bar")
#20045=@"loc,{#10000},5,16,5,18"
locations_default(#20045,#10000,5,16,5,18)
hasLocation(#20044,#20045)
#20046=*
tokeninfo(#20046,8,#20001,14,"=")
#20047=@"loc,{#10000},5,20,5,20"
locations_default(#20047,#10000,5,20,5,20)
hasLocation(#20046,#20047)
#20048=*
tokeninfo(#20048,8,#20001,15,"{|")
#20049=@"loc,{#10000},5,22,5,23"
locations_default(#20049,#10000,5,22,5,23)
hasLocation(#20048,#20049)
#20050=*
tokeninfo(#20050,6,#20001,16,"baz")
#20051=@"loc,{#10000},6,5,6,7"
locations_default(#20051,#10000,6,5,6,7)
hasLocation(#20050,#20051)
#20052=*
tokeninfo(#20052,8,#20001,17,":")
#20053=@"loc,{#10000},6,8,6,8"
locations_default(#20053,#10000,6,8,6,8)
hasLocation(#20052,#20053)
#20054=*
tokeninfo(#20054,8,#20001,18,"(")
#20055=@"loc,{#10000},6,10,6,10"
locations_default(#20055,#10000,6,10,6,10)
hasLocation(#20054,#20055)
#20056=*
tokeninfo(#20056,8,#20001,19,")")
#20057=@"loc,{#10000},6,11,6,11"
locations_default(#20057,#10000,6,11,6,11)
hasLocation(#20056,#20057)
#20058=*
tokeninfo(#20058,8,#20001,20,"=>")
#20059=@"loc,{#10000},6,13,6,14"
locations_default(#20059,#10000,6,13,6,14)
hasLocation(#20058,#20059)
#20060=*
tokeninfo(#20060,6,#20001,21,"Promise")
#20061=@"loc,{#10000},6,16,6,22"
locations_default(#20061,#10000,6,16,6,22)
hasLocation(#20060,#20061)
#20062=*
tokeninfo(#20062,8,#20001,22,"<")
#20063=@"loc,{#10000},6,23,6,23"
locations_default(#20063,#10000,6,23,6,23)
hasLocation(#20062,#20063)
#20064=*
tokeninfo(#20064,8,#20001,23,"?")
#20065=@"loc,{#10000},6,24,6,24"
locations_default(#20065,#10000,6,24,6,24)
hasLocation(#20064,#20065)
#20066=*
tokeninfo(#20066,6,#20001,24,"Foo")
#20067=@"loc,{#10000},6,25,6,27"
locations_default(#20067,#10000,6,25,6,27)
hasLocation(#20066,#20067)
#20068=*
tokeninfo(#20068,8,#20001,25,">")
#20069=@"loc,{#10000},6,28,6,28"
locations_default(#20069,#10000,6,28,6,28)
hasLocation(#20068,#20069)
#20070=*
tokeninfo(#20070,8,#20001,26,"|}")
#20071=@"loc,{#10000},7,3,7,4"
locations_default(#20071,#10000,7,3,7,4)
hasLocation(#20070,#20071)
#20072=*
tokeninfo(#20072,8,#20001,27,";")
#20073=@"loc,{#10000},7,5,7,5"
locations_default(#20073,#10000,7,5,7,5)
hasLocation(#20072,#20073)
#20074=*
tokeninfo(#20074,8,#20001,28,"}")
hasLocation(#20074,#20017)
#20075=*
tokeninfo(#20075,0,#20001,29,"")
#20076=@"loc,{#10000},9,1,9,0"
locations_default(#20076,#10000,9,1,9,0)
hasLocation(#20075,#20076)
toplevels(#20001,0)
#20077=@"loc,{#10000},1,1,9,0"
locations_default(#20077,#10000,1,1,9,0)
hasLocation(#20001,#20077)
#20078=*
entry_cfg_node(#20078,#20001)
#20079=@"loc,{#10000},1,1,1,0"
locations_default(#20079,#10000,1,1,1,0)
hasLocation(#20078,#20079)
#20080=*
exit_cfg_node(#20080,#20001)
hasLocation(#20080,#20076)
successor(#20078,#20080)
numlines(#10000,8,7,0)
filetype(#10000,"javascript")

0 comments on commit c7e428e

Please sign in to comment.