{
(* (c) Microsoft Corporation 2005-2006.  *)
  
open Fslexast
open Fslexpars
open Lexing
let escape c = 
  match c with
  | '\\' -> '\\'
  | '\'' -> '\''
  | 'n' -> '\n'
  | 't' -> '\t'
  | 'b' -> '\b'
  | 'r' -> '\r'
  | c -> c

let unexpected_char lexbuf =
  failwith ("Unexpected character '"^(Lexing.lexeme lexbuf)^"'")

let digit d = 
  if d >= '0' && d <= '9' then Char.code d - Char.code '0'   
  else failwith "digit" 

let hexdigit d = 
  if d >= '0' && d <= '9' then digit d 
  else if d >= 'a' && d <= 'f' then Char.code d - Char.code 'a' + 10
  else if d >= 'A' && d <= 'F' then Char.code d - Char.code 'A' + 10
  else failwith "hexdigit" 

let trigraph c1 c2 c3 =
  Char.chr (digit c1 * 100 + digit c2 * 10 + digit c3)

let hexgraph c1 c2 =
  Char.chr (hexdigit c1 * 16 + hexdigit c2)

let inc_lnum bol pos = 
  let lnum = pos.Lexing.pos_lnum in 
  {pos with Lexing.pos_lnum =  lnum+1; Lexing.pos_bol = bol }

let newline lexbuf = 
  (*IF-FSHARP Lexing.lexbuf_set_curr_p lexbuf ENDIF-FSHARP*)
  (*IF-OCAML*) lexbuf.Lexing.lex_curr_p <- (*ENDIF-OCAML*)
    ( inc_lnum (Lexing.lexeme_end lexbuf) (Lexing.lexeme_end_p lexbuf))

} 

let letter = ['A'-'Z'] | ['a'-'z']
let digit = ['0'-'9']
let whitespace = [' ' '\t']
let char = '\'' ( [^'\\'] | ('\\' ( '\\' | '\'' | "\"" | 'n' | 't' | 'b' | 'r'))) '\''
let hex = ['0'-'9'] | ['A'-'F'] | ['a'-'f']
let hexgraph = '\\' 'x' hex hex
let trigraph = '\\' digit digit digit
let newline = ('\n' | '\r' '\n')
let ident_start_char = letter       
let ident_char = ( ident_start_char| digit | ['\'' '_'] )
let ident = ident_start_char ident_char*

rule token = parse
 | "rule" {RULE }
 | "parse" {PARSE }
 | "eof" {EOF }
 | "let" {LET }
 | "and" {AND }
 | char
   { let s = lexeme lexbuf in 
     CHAR (if s.[1] = '\\' then escape s.[2] else s.[1])  }
 | '\'' trigraph '\''
   { let s = lexeme lexbuf in 
     CHAR (trigraph s.[2] s.[3] s.[4]) }
 | '\'' hexgraph '\''
   { let s = lexeme lexbuf in 
     CHAR (hexgraph s.[3] s.[4]) }
 | '{' { let p = lexeme_start_p lexbuf in code p (Buffer.create 100) lexbuf }
 | '"' { string  (lexeme_start_p lexbuf) (Buffer.create 100) lexbuf }
 | whitespace+  { token lexbuf }
 | newline { newline lexbuf; token lexbuf }
 | ident_start_char ident_char* { IDENT (lexeme lexbuf) }
 | '|' { BAR }
 | '.' { DOT }
 | '+' { PLUS }
 | '*' { STAR }
 | '?' { QMARK }
 | '=' { EQUALS }
 | '[' { LBRACK }
 | ']' { RBRACK }
 | '(' { LPAREN }
 | ')' { RPAREN }
 | '_' { UNDERSCORE }
 | '^' { HAT }
 | '-' { DASH }
 | "(*" { ignore(comment (lexeme_start_p lexbuf) lexbuf); token lexbuf }
 | "//" [^'\n''\r']* {  token lexbuf  }
 | _ { unexpected_char lexbuf }     
 | eof { EOF  }                                     
and string p buff = parse
 |  '\\' newline { newline lexbuf; string p buff lexbuf }
 |  '\\' ( '"' | '\\' | '\'' | 'n' | 't' | 'b' | 'r')
   { Buffer.add_char buff (escape (lexeme_char lexbuf 1));
     string p buff lexbuf } 
 | trigraph
   { let s = lexeme lexbuf in 
     Buffer.add_char buff (trigraph s.[1] s.[2] s.[3]);
     string p buff lexbuf  }
 | '"' { STRING (Buffer.contents buff) }
 | newline { newline lexbuf; 
             Buffer.add_string buff "\n"; 
             string p buff lexbuf }
 | (whitespace | letter | digit) +  
   { Buffer.add_string buff (lexeme lexbuf); 
     string p buff lexbuf }
 | eof { failwith (Printf.sprintf "end of file in string started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))  }
 | _ { Buffer.add_char buff (lexeme_char lexbuf 0); 
       string p buff lexbuf }
and code p buff = parse
 | "}" { CODE (Buffer.contents buff, p) }
 | "{" { Buffer.add_string buff (lexeme lexbuf); 
         ignore(code p buff lexbuf); 
         Buffer.add_string buff "}"; 
         code p buff lexbuf }
 |  '\\' ('"' | '\\')
   { Buffer.add_string buff (lexeme lexbuf); 
     code p buff lexbuf } 
 | "\"" { Buffer.add_string buff (lexeme lexbuf); 
          ignore(codestring buff lexbuf); 
          code p buff lexbuf }
 | newline { newline lexbuf; 
             Buffer.add_string buff "\n"; 
             code p buff lexbuf }
 | (whitespace | letter | digit) +  
   { Buffer.add_string buff (lexeme lexbuf); 
     code p buff lexbuf }
 | eof { EOF }
 | _ { Buffer.add_char buff (lexeme_char lexbuf 0); 
       code p buff lexbuf }

and codestring buff = parse
 |  '\\' ('"' | '\\')
   { Buffer.add_string buff (lexeme lexbuf); 
     codestring buff lexbuf } 
 | '"' { Buffer.add_string buff (lexeme lexbuf); 
         Buffer.contents buff }
 | newline { newline lexbuf; 
             Buffer.add_string buff "\n"; 
             codestring buff lexbuf }
 | (whitespace | letter | digit) +  
   { Buffer.add_string buff (lexeme lexbuf); 
     codestring buff lexbuf }
 | eof { failwith "unterminated string in code" }
 | _ { Buffer.add_char buff (lexeme_char lexbuf 0); 
       codestring buff lexbuf }

and comment p = parse
 |  char { comment p lexbuf } 
 | '"' { ignore(try string (lexeme_start_p lexbuf) (Buffer.create 100) lexbuf 
                with Failure s -> failwith (s ^ "\n" ^ Printf.sprintf "error while processing string nested in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); 
         comment p lexbuf }
 | "(*" { ignore(try comment p lexbuf with Failure s -> failwith (s ^ "\n" ^ Printf.sprintf "error while processing nested comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); 
          comment p lexbuf }
 | newline { newline lexbuf; comment p lexbuf }
 | "*)" { () }
 | eof { failwith (Printf.sprintf "end of file in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))  }
 | [^ '\'' '(' '*' '\n' '\r' '"' ')' ]+  { comment p lexbuf }
 | _  { comment p lexbuf }
