diff -uNr fxp-2.0.orig/src/Apps/Canon/canon.cm fxp-2.0/src/Apps/Canon/canon.cm --- fxp-2.0.orig/src/Apps/Canon/canon.cm Sat Jun 26 02:42:59 2004 +++ fxp-2.0/src/Apps/Canon/canon.cm Thu Nov 1 08:58:57 2007 @@ -5,3 +5,6 @@ canonHooks.sml canon.sml ../../fxlib.cm +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif diff -uNr fxp-2.0.orig/src/Apps/Canon/canon.mlb fxp-2.0/src/Apps/Canon/canon.mlb --- fxp-2.0.orig/src/Apps/Canon/canon.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Canon/canon.mlb Thu Nov 1 08:58:57 2007 @@ -0,0 +1,16 @@ +ann + "nonexhaustiveMatch warn" + "sequenceNonUnit warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + ../../fxlib.mlb + in + canonOptions.sml + canonEncode.sml + canonOutput.sml + canonHooks.sml + canon.sml + runcanon.sml + end +end diff -uNr fxp-2.0.orig/src/Apps/Canon/canonprog.sml fxp-2.0/src/Apps/Canon/canonprog.sml --- fxp-2.0.orig/src/Apps/Canon/canonprog.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Canon/canonprog.sml Thu Nov 1 19:34:29 2007 @@ -0,0 +1,4 @@ +structure CanonProg = struct + fun main () = ignore (Canon.canon + (CommandLine.name(),CommandLine.arguments())) +end diff -uNr fxp-2.0.orig/src/Apps/Canon/runcanon.sml fxp-2.0/src/Apps/Canon/runcanon.sml --- fxp-2.0.orig/src/Apps/Canon/runcanon.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Canon/runcanon.sml Thu Nov 1 08:58:57 2007 @@ -0,0 +1,2 @@ +val _ = Canon.canon (CommandLine.name(), CommandLine.arguments()) + diff -uNr fxp-2.0.orig/src/Apps/Copy/copy.cm fxp-2.0/src/Apps/Copy/copy.cm --- fxp-2.0.orig/src/Apps/Copy/copy.cm Sat Jun 26 02:43:00 2004 +++ fxp-2.0/src/Apps/Copy/copy.cm Thu Nov 1 08:58:54 2007 @@ -1,5 +1,10 @@ Group is +#if (SMLNJ_MINOR_VERSION > 40) copyEncode.sml + $/basis.cm +#else + copyEncode.orig.sml +#endif copyOptions.sml copyOutput.sml copyHooks.sml diff -uNr fxp-2.0.orig/src/Apps/Copy/copy.mlb fxp-2.0/src/Apps/Copy/copy.mlb --- fxp-2.0.orig/src/Apps/Copy/copy.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Copy/copy.mlb Thu Nov 1 08:58:55 2007 @@ -0,0 +1,16 @@ +ann + "nonexhaustiveMatch warn" + "sequenceNonUnit warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + ../../fxlib.mlb + in + copyOptions.sml + copyEncode.sml + copyOutput.sml + copyHooks.sml + copy.sml + runcopy.sml + end +end diff -uNr fxp-2.0.orig/src/Apps/Copy/copyEncode.orig.sml fxp-2.0/src/Apps/Copy/copyEncode.orig.sml --- fxp-2.0.orig/src/Apps/Copy/copyEncode.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Copy/copyEncode.orig.sml Thu Nov 1 08:58:55 2007 @@ -0,0 +1,135 @@ +signature CopyEncode = + sig + type File + + val noFile : File + val openFile : string * Encoding.Encoding * string -> File + val closeFile : File -> unit + + val putBlank : File -> File + val putNl : File -> File + + val putChar : File * UniChar.Char -> File + val putAttChar : File * UniChar.Char -> File + val putDataChar : File * UniChar.Char -> File + + val putData : File * UniChar.Data -> File + val putAttData : File * UniChar.Data -> File + val putDataData : File * UniChar.Data -> File + + val putVector : File * UniChar.Vector -> File + val putAttVector : File * UniChar.Vector -> File + val putDataVector : File * UniChar.Vector -> File + val putEntVector : File * UniChar.Vector -> File + + val putAttValue : File * UniChar.Vector * UniChar.Char -> File + val putEntValue : bool -> File * UniChar.Vector * UniChar.Char -> File + val putString : File * string -> File + + val putCharRef : File * UniChar.Char -> File + val putGenRef : File * UniChar.Data -> File + val putParRef : File * UniChar.Data -> File + end + +functor CopyEncode (structure ParserOptions : ParserOptions) : CopyEncode = + struct + open + CopyOptions Encode ParserOptions UniChar UniClasses UtilError + + fun encodeError err = if !O_SILENT then () else + TextIO.output(!O_ERROR_DEVICE,formatMessage (4,!O_ERROR_LINEWIDTH) + ("Encoding error:"::encodeMessage err)) + + type File = EncFile + val noFile = encNoFile + fun openFile fe = encOpenFile fe + handle NoSuchFile (f,msg) => noFile before + TextIO.output(!O_ERROR_DEVICE,formatMessage (4,!O_ERROR_LINEWIDTH) + ["Cannot open file '"^f^"' for writing. ("^msg^")"]) + val closeFile = encCloseFile + val validChar = encValidChar + + fun putChar(enc,c) = encPutChar(enc,c) + handle EncodeError(f,msg) => encAdapt(enc,f) before encodeError msg + + fun putData(enc,cs) = foldl (fn (c,enc) => putChar(enc,c)) enc cs + fun putVector(enc,cv) = Vector.foldl (fn (c,enc) => putChar(enc,c)) enc cv + + fun putNl f = putChar(f,0wx0A) + fun putBlank f = putChar(f,0wx20) + + val hexDigits = Vector.tabulate(16,fn i => Chars.fromInt((if i<10 then 48 else 55)+i)) + fun hexDigit n = Vector.sub(hexDigits,Chars.toInt n) + fun charRefSeq c = + if c=0wx00 then [0wx26,0wx23,0wx78,0wx30,0wx3b] (* "�" *) + else let fun mk_hex yet n = if n=(0w0:Char) then yet + else mk_hex (hexDigit(n mod 0w16)::yet) (n div 0w16) + in 0wx26::0wx23::0wx78::mk_hex [0wx3b] c + end + fun putCharRef(f,c) = putData(f,charRefSeq c) + fun putGenRef(f,ent) = let val f1 = putChar(f,0wx26) + val f2 = putData(f1,ent) + val f3 = putChar(f2,0wx3b) + in f3 + end + fun putParRef(f,ent) = let val f1 = putChar(f,0wx25) + val f2 = putData(f1,ent) + val f3 = putChar(f2,0wx3b) + in f3 + end + + fun putAttChar(f,c) = + case c + of 0wx26 => putData(f,[0wx26,0wx61,0wx6d,0wx70,0wx3b]) (* "&" *) + | 0wx3C => putData(f,[0wx26,0wx6c,0wx74,0wx3b]) (* "<" *) + | _ => if validChar(f,c) then putChar(f,c) else putCharRef(f,c) + fun putDataChar(f,c) = + case c + of 0wx26 => putData(f,[0wx26,0wx61,0wx6d,0wx70,0wx3b]) (* "&" *) + | 0wx3C => putData(f,[0wx26,0wx6c,0wx74,0wx3b]) (* "<" *) + | 0wx3E => if !O_COMPATIBILITY + then putData(f,[0wx26,0wx67,0wx74,0wx3b]) (* ">" *) + else putChar(f,c) + | _ => if validChar(f,c) then putChar(f,c) else putCharRef(f,c) + + fun putAttData(f,cs) = + foldl (fn (c,f) => putAttChar(f,c)) f cs + fun putDataData(f,cs) = + foldl (fn (c,f) => putDataChar(f,c)) f cs + + fun putAttVector(f,cv) = + Vector.foldl (fn (c,f) => if validChar(f,c) then putChar(f,c) else putCharRef(f,c)) f cv + fun putDataVector(f,cv) = + Vector.foldl (fn (c,f) => putDataChar(f,c)) f cv + fun putEntVector (f,cv) = + Vector.foldl (fn (c,f) => if validChar(f,c) then putChar(f,c) else putCharRef(f,c)) f cv + + fun putAttValue (f,cv,q) = + let + fun putOne(c,f) = + case c + of 0wx26 => putData(f,[0wx26,0wx61,0wx6d,0wx70,0wx3b]) (* "&" *) + | 0wx3C => putData(f,[0wx26,0wx6c,0wx74,0wx3b]) (* "<" *) + | _ => if c<>q andalso validChar(f,c) then putChar(f,c) else putCharRef(f,c) + val f1 = putChar(f,q) + val f2 = Vector.foldl putOne f1 cv + val f3 = putChar(f2,q) + in f3 + end + fun putEntValue escapeParRef (f,cv,q) = + let + fun putOne(i,c,f) = + case c + of 0wx25 => if escapeParRef then putCharRef(f,c) else putChar(f,c) + | 0wx26 => if i+1 if c<>q andalso validChar(f,c) then putChar(f,c) else putCharRef(f,c) + + val f1 = putChar(f,q) + val f2 = Vector.foldli putOne f1 (cv,0,NONE) + val f3 = putChar(f2,q) + in f3 + end + + fun putString(f,str) = putData(f,String2Data str) + end diff -uNr fxp-2.0.orig/src/Apps/Copy/copyEncode.sml fxp-2.0/src/Apps/Copy/copyEncode.sml --- fxp-2.0.orig/src/Apps/Copy/copyEncode.sml Sat Jun 26 02:42:59 2004 +++ fxp-2.0/src/Apps/Copy/copyEncode.sml Fri Nov 2 11:22:24 2007 @@ -126,7 +126,7 @@ | _ => if c<>q andalso validChar(f,c) then putChar(f,c) else putCharRef(f,c) val f1 = putChar(f,q) - val f2 = Vector.foldli putOne f1 (cv,0,NONE) + val f2 = Vector.foldli putOne f1 cv val f3 = putChar(f2,q) in f3 end diff -uNr fxp-2.0.orig/src/Apps/Copy/copyprog.sml fxp-2.0/src/Apps/Copy/copyprog.sml --- fxp-2.0.orig/src/Apps/Copy/copyprog.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Copy/copyprog.sml Thu Nov 1 19:35:13 2007 @@ -0,0 +1,4 @@ +structure CopyProg = struct + fun main () = ignore (Copy.copy + (CommandLine.name(),CommandLine.arguments())) +end diff -uNr fxp-2.0.orig/src/Apps/Copy/runcopy.sml fxp-2.0/src/Apps/Copy/runcopy.sml --- fxp-2.0.orig/src/Apps/Copy/runcopy.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Copy/runcopy.sml Thu Nov 1 08:58:55 2007 @@ -0,0 +1,2 @@ +val _ = Copy.copy (CommandLine.name(), CommandLine.arguments()) + diff -uNr fxp-2.0.orig/src/Apps/Esis/esis.cm fxp-2.0/src/Apps/Esis/esis.cm --- fxp-2.0.orig/src/Apps/Esis/esis.cm Sat Jun 26 02:43:01 2004 +++ fxp-2.0/src/Apps/Esis/esis.cm Thu Nov 1 08:58:55 2007 @@ -5,3 +5,6 @@ esisHooks.sml esisData.sml ../../fxlib.cm +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif diff -uNr fxp-2.0.orig/src/Apps/Esis/esis.mlb fxp-2.0/src/Apps/Esis/esis.mlb --- fxp-2.0.orig/src/Apps/Esis/esis.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Esis/esis.mlb Thu Nov 1 08:58:56 2007 @@ -0,0 +1,16 @@ +ann + "nonexhaustiveMatch warn" + "sequenceNonUnit warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + ../../fxlib.mlb + in + esisOptions.sml + esisData.sml + esisOutput.sml + esisHooks.sml + esis.sml + runesis.sml + end +end diff -uNr fxp-2.0.orig/src/Apps/Esis/esisprog.sml fxp-2.0/src/Apps/Esis/esisprog.sml --- fxp-2.0.orig/src/Apps/Esis/esisprog.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Esis/esisprog.sml Thu Nov 1 19:35:37 2007 @@ -0,0 +1,4 @@ +structure EsisProg = struct + fun main () = ignore (Esis.esis + (CommandLine.name(),CommandLine.arguments())) +end diff -uNr fxp-2.0.orig/src/Apps/Esis/runesis.sml fxp-2.0/src/Apps/Esis/runesis.sml --- fxp-2.0.orig/src/Apps/Esis/runesis.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Esis/runesis.sml Thu Nov 1 08:58:56 2007 @@ -0,0 +1,2 @@ +val _ = Esis.esis (CommandLine.name(), CommandLine.arguments()) + diff -uNr fxp-2.0.orig/src/Apps/Null/null.cm fxp-2.0/src/Apps/Null/null.cm --- fxp-2.0.orig/src/Apps/Null/null.cm Sat Jun 26 02:42:58 2004 +++ fxp-2.0/src/Apps/Null/null.cm Thu Nov 1 08:58:56 2007 @@ -4,3 +4,6 @@ null.sml nullHard.sml ../../fxlib.cm +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif diff -uNr fxp-2.0.orig/src/Apps/Null/null.mlb fxp-2.0/src/Apps/Null/null.mlb --- fxp-2.0.orig/src/Apps/Null/null.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Null/null.mlb Thu Nov 1 08:58:56 2007 @@ -0,0 +1,15 @@ +ann + "nonexhaustiveMatch warn" + "sequenceNonUnit warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + ../../fxlib.mlb + in + nullOptions.sml + nullHooks.sml + nullHard.sml + null.sml + runnull.sml + end +end diff -uNr fxp-2.0.orig/src/Apps/Null/nullprog.sml fxp-2.0/src/Apps/Null/nullprog.sml --- fxp-2.0.orig/src/Apps/Null/nullprog.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Null/nullprog.sml Thu Nov 1 19:35:58 2007 @@ -0,0 +1,4 @@ +structure NullProg = struct + fun main () = ignore (Null.null + (CommandLine.name(),CommandLine.arguments())) +end diff -uNr fxp-2.0.orig/src/Apps/Null/runnull.sml fxp-2.0/src/Apps/Null/runnull.sml --- fxp-2.0.orig/src/Apps/Null/runnull.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Null/runnull.sml Thu Nov 1 08:58:57 2007 @@ -0,0 +1,2 @@ +val _ = Null.null (CommandLine.name(), CommandLine.arguments()) + diff -uNr fxp-2.0.orig/src/Apps/Viz/runviz.sml fxp-2.0/src/Apps/Viz/runviz.sml --- fxp-2.0.orig/src/Apps/Viz/runviz.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Viz/runviz.sml Thu Nov 1 08:58:54 2007 @@ -0,0 +1,2 @@ +val _ = Viz.viz (CommandLine.name(), CommandLine.arguments()) + diff -uNr fxp-2.0.orig/src/Apps/Viz/viz.cm fxp-2.0/src/Apps/Viz/viz.cm --- fxp-2.0.orig/src/Apps/Viz/viz.cm Sat Jun 26 02:43:01 2004 +++ fxp-2.0/src/Apps/Viz/viz.cm Thu Nov 1 08:58:53 2007 @@ -3,3 +3,6 @@ viz.sml vizHooks.sml ../../fxlib.cm +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif diff -uNr fxp-2.0.orig/src/Apps/Viz/viz.mlb fxp-2.0/src/Apps/Viz/viz.mlb --- fxp-2.0.orig/src/Apps/Viz/viz.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Viz/viz.mlb Thu Nov 1 08:58:54 2007 @@ -0,0 +1,14 @@ +ann + "nonexhaustiveMatch warn" + "sequenceNonUnit warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + ../../fxlib.mlb + in + vizOptions.sml + vizHooks.sml + viz.sml + runviz.sml + end +end diff -uNr fxp-2.0.orig/src/Apps/Viz/vizprog.sml fxp-2.0/src/Apps/Viz/vizprog.sml --- fxp-2.0.orig/src/Apps/Viz/vizprog.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Apps/Viz/vizprog.sml Thu Nov 1 19:36:23 2007 @@ -0,0 +1,4 @@ +structure VizProg = struct + fun main () = ignore (Viz.viz + (CommandLine.name(),CommandLine.arguments())) +end diff -uNr fxp-2.0.orig/src/Catalog/catalog.cm fxp-2.0/src/Catalog/catalog.cm --- fxp-2.0.orig/src/Catalog/catalog.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Catalog/catalog.cm Thu Nov 1 08:59:05 2007 @@ -0,0 +1,28 @@ +Group + signature CatError + structure CatError + signature CatOptions + functor CatOptions + signature CatParams + functor ResolveCatalog +is + catData.sml + catDtd.sml + catError.sml + catFile.sml + catHooks.sml + catOptions.sml + catParams.sml + catParse.sml + catResolve.sml + catalog.sml + socatParse.sml + + ../Unicode/unicode.cm + ../Parser/parser.cm + ../Util/util.cm + +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Parser/Base/base.cm fxp-2.0/src/Parser/Base/base.cm --- fxp-2.0.orig/src/Parser/Base/base.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Base/base.cm Thu Nov 1 08:59:01 2007 @@ -0,0 +1,16 @@ +Group + structure Base +is + base.sml + baseData.sml + baseString.sml + + ../Dfa/dfa.cm + ../Error/error.cm + ../../Unicode/unicode.cm + ../../Util/util.cm + +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfa.cm fxp-2.0/src/Parser/Dfa/dfa.cm --- fxp-2.0.orig/src/Parser/Dfa/dfa.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dfa/dfa.cm Thu Nov 1 08:59:00 2007 @@ -0,0 +1,31 @@ +Group + structure DfaData + + signature DfaString + structure DfaString + signature Dfa + functor Dfa + signature DfaOptions + functor DfaOptions +is + dfa.sml + dfaData.sml + dfaError.sml + dfaOptions.sml + dfaPassOne.sml + dfaPassThree.sml + +#if (SMLNJ_MINOR_VERSION > 40) + dfaPassTwo.sml + dfaString.sml + dfaUtil.sml + $/basis.cm +#else + dfaPassTwo.orig.sml + dfaString.orig.sml + dfaUtil.orig.sml +#endif + + ../../Util/util.cm + ../../Util/SymDict/symdict.cm + diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaPassTwo.orig.sml fxp-2.0/src/Parser/Dfa/dfaPassTwo.orig.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaPassTwo.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dfa/dfaPassTwo.orig.sml Thu Nov 1 08:59:00 2007 @@ -0,0 +1,77 @@ + + + + + +(*--------------------------------------------------------------------------*) +(* Structure: DfaPassTwo *) +(* *) +(* Depends on: *) +(* DfaData *) +(* DfaUtil *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* passTwo : ConflictFollow *) +(*--------------------------------------------------------------------------*) +signature DfaPassTwo = + sig + val passTwo: bool -> DfaBase.CM -> (DfaBase.Follow * bool) vector + end + +structure DfaPassTwo : DfaPassTwo = + struct + open DfaBase DfaUtil + + (*--------------------------------------------------------------------*) + (* Given a CM annotated with leaf numbers (states), Empty and First, *) + (* compute Follow and Fin foreach node, and generate the transition *) + (* row if node is a leaf. Follow and Fin are computed top-down: *) + (* *) + (* (Top-Level): *) + (* Follow e = {}, Fin e = true *) + (* *) + (* (e=e1?): *) + (* Follow e1 = Follow e, Fin e1 = Fin e *) + (* *) + (* (e=e1*, e=e1+) *) + (* Follow e1 = Follow e1 ++ First e1, Fin e1 = Fin e *) + (* *) + (* (e=e1|...|eN) = *) + (* Follow eI = Follow e, Fin eI = Fin e for i=0...n *) + (* *) + (* (e=e1,...,eN) = *) + (* Follow eN = Follow e, Fin eN = Fin e *) + (* Follow eI = First eI+1, if Empty eI+1 = false, ia1 forall (q1,a1) in F1, (q1,a1) in F1} *) + (* error, if exist (q1,a) in F1, (q2,a) in F2 *) + (* then raise ConflictFirst(a,q1,q2) *) + (*--------------------------------------------------------------------*) + fun passTwo nondet (cmi as (_,(n,mt,fst))) = + let + val table = Array.array(n+1,(nil,false)) + + val _ = Array.update(table,0,(fst,mt)) + + fun do_cm (ff as (flw,fin)) (cm,(q,mt,fst)) = + case cm + of ELEM a => Array.update(table,q,ff) + | OPT cmi => do_cm ff cmi + | REP cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi + | PLUS cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi + | ALT cmis => app (do_cm ff) cmis + | SEQ cmis => ignore (do_seq ff cmis) + and do_seq ff cmis = foldr + (fn (cmi as (_,(_,mt,fst)),ff as (flw,fin)) + => (do_cm ff cmi; + if mt then (mergeFollow nondet (fst,flw),fin) else (fst,false))) + ff cmis + + val _ = do_cm (nil,true) cmi + + in Array.extract (table,0,NONE) + end + end diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaPassTwo.sml fxp-2.0/src/Parser/Dfa/dfaPassTwo.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaPassTwo.sml Sat Jun 26 02:42:52 2004 +++ fxp-2.0/src/Parser/Dfa/dfaPassTwo.sml Thu Nov 1 08:59:00 2007 @@ -72,6 +72,6 @@ val _ = do_cm (nil,true) cmi - in Array.extract (table,0,NONE) + in Array.vector table end end diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaData.sml fxp-2.0/src/Parser/Dfa/dfaData.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaData.sml.orig Mon Nov 5 21:23:08 2007 +++ fxp-2.0/src/Parser/Dfa/dfaData.sml Mon Nov 5 23:24:09 2007 @@ -70,4 +70,4 @@ val emptyDfa : Dfa = Vector.fromList [(1,0,Vector.fromList nil,true)] end -structure DfaData = DfaBase : DfaData +structure DfaData : DfaData = DfaBase diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaString.orig.sml fxp-2.0/src/Parser/Dfa/dfaString.orig.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaString.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dfa/dfaString.orig.sml Thu Nov 1 08:59:00 2007 @@ -0,0 +1,78 @@ + + + + + +(*--------------------------------------------------------------------------*) +(* Structure: DfaString *) +(* *) +(* Notes: *) +(* This structure is needed for debugging of content models and tables. *) +(* *) +(* Depends on: *) +(* DfaData *) +(* UtilString *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* Table2String : none *) +(* ContentModel2String : none *) +(*--------------------------------------------------------------------------*) +signature DfaString = + sig + val ContentModel2String : (int -> string) -> DfaData.ContentModel -> string + val Dfa2String : (int -> string) -> DfaData.Dfa -> string + end + +structure DfaString : DfaString = + struct + open DfaBase UtilString + + fun State2String q = if q=dfaError then "Error" else Int2String q + + fun Info2String Elem2String (q,mt,fst) = String.concat + (State2String q::Bool2xString ("[empty]","") mt + ::map (fn (q,a) => " "^Elem2String a^"->"^State2String q) fst) + + fun ContentModel2String Elem2String cm = + case cm + of CM_ELEM i => Elem2String i + | CM_OPT cm => ContentModel2String Elem2String cm^"?" + | CM_REP cm => ContentModel2String Elem2String cm^"*" + | CM_PLUS cm => ContentModel2String Elem2String cm^"+" + | CM_ALT cms => List2xString ("(","|",")") (ContentModel2String Elem2String) cms + | CM_SEQ cms => List2xString ("(",",",")") (ContentModel2String Elem2String) cms + + fun CM2String Elem2String = + let fun cm2s indent cm = + case cm + of (ELEM a,info) => String.concat + [indent,Elem2String a," ",Info2String Elem2String info,"\n"] + | (OPT cm',info) => String.concat + [indent,"? ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] + | (REP cm',info) => String.concat + [indent,"* ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] + | (PLUS cm',info) => String.concat + [indent,"+ ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] + | (ALT cms,info) => String.concat + (indent^"| "::Info2String Elem2String info::"\n" + ::map (cm2s (indent^" ")) cms) + | (SEQ cms,info) => String.concat + (indent^", "::Info2String Elem2String info::"\n" + ::map (cm2s (indent^" ")) cms) + in cm2s "" + end + + fun Row2String Elem2String (lo,hi,tab,fin) = + String.concat + (Vector.foldri + (fn (i,q,yet) => if q<0 then yet + else " "::Elem2String (i+lo)::"->"::State2String q::yet) + (if fin then [" [Final]"] else nil) + (tab,0,NONE)) + + fun Dfa2String Elem2String tab = + String.concat + (Vector.foldri + (fn (q,row,yet) => State2String q::":"::Row2String Elem2String row::yet) + nil (tab,0,NONE)) + end diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaString.sml fxp-2.0/src/Parser/Dfa/dfaString.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaString.sml Sat Jun 26 02:42:52 2004 +++ fxp-2.0/src/Parser/Dfa/dfaString.sml Thu Nov 1 08:59:00 2007 @@ -68,11 +68,11 @@ (fn (i,q,yet) => if q<0 then yet else " "::Elem2String (i+lo)::"->"::State2String q::yet) (if fin then [" [Final]"] else nil) - (tab,0,NONE)) + tab) fun Dfa2String Elem2String tab = String.concat (Vector.foldri (fn (q,row,yet) => State2String q::":"::Row2String Elem2String row::yet) - nil (tab,0,NONE)) + nil tab) end diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaUtil.orig.sml fxp-2.0/src/Parser/Dfa/dfaUtil.orig.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaUtil.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dfa/dfaUtil.orig.sml Thu Nov 1 08:59:00 2007 @@ -0,0 +1,130 @@ + + + + + + +(*--------------------------------------------------------------------------*) +(* Structure: DfaUtil *) +(* *) +(* Depends on: *) +(* DfaData *) +(* UtilInt *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* boundsFollow : none *) +(* cmSymbols : none *) +(* makeRow : none *) +(* mergeFirst : ConflictFirst *) +(* mergeFollow : ConflictFollow *) +(*--------------------------------------------------------------------------*) +signature DfaUtil = + sig + val mergeFirst : bool -> DfaBase.First * DfaBase.First -> DfaBase.First + val mergeFollow : bool -> DfaBase.Follow * DfaBase.Follow -> DfaBase.Follow + val boundsFollow : DfaBase.Follow -> DfaBase.Sigma * DfaBase.Sigma + val cmSymbols : DfaBase.ContentModel -> DfaBase.Sigma list + val makeRow : DfaBase.Follow * bool -> DfaBase.Row + end + +structure DfaUtil : DfaUtil = + struct + open UtilInt DfaBase + + (*--------------------------------------------------------------------*) + (* merge two First sets, raise ConflictFirst at conflict: there may *) + (* not be two entries (q1,a) and (q2,a) in the same First set, if *) + (* nondet is false. *) + (*--------------------------------------------------------------------*) + fun mergeFirst nondet ll = + let + fun go_det (nil,l) = l + | go_det (l,nil) = l + | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = + case Int.compare(a1,a2) + of LESS => x1::go_det(r1,l2) + | GREATER => x2::go_det(l1,r2) + | EQUAL => raise ConflictFirst(a1,q1,q2) + + fun go_nondet (nil,l) = l + | go_nondet (l,nil) = l + | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = + case Int.compare(a1,a2) + of LESS => x1::go_nondet(r1,l2) + | GREATER => x2::go_nondet(l1,r2) + | EQUAL => case Int.compare(q1,q2) + of LESS => x1::go_nondet(r1,l2) + | GREATER => x2::go_nondet(l1,r2) + | EQUAL => go_nondet(l1,r2) + in + if nondet then go_nondet ll else go_det ll + end + + (*--------------------------------------------------------------------*) + (* merge two Follow sets, raise ConflictFollow at conflict. there may *) + (* not be two entries (q1,a) and (q2,a) with q1<>q2 in the same Follow*) + (* set, if nondet is false. Note that, e.g. for (a+)+, Follow(a) = *) + (* Follow(a+) U First(a+), so duplicate occurrences of the same (q,a) *) + (* are possible (as opposed to First). *) + (*--------------------------------------------------------------------*) + fun mergeFollow nondet ll = + let + fun go_det (nil,l) = l + | go_det (l,nil) = l + | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = + case Int.compare(a1,a2) + of LESS => x1::go_det(r1,l2) + | GREATER => x2::go_det(l1,r2) + | EQUAL => if q1=q2 then go_det(l1,r2) + else raise ConflictFollow(a1,q1,q2) + + fun go_nondet (nil,l) = l + | go_nondet (l,nil) = l + | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = + case Int.compare(a1,a2) + of LESS => x1::go_nondet(r1,l2) + | GREATER => x2::go_nondet(l1,r2) + | EQUAL => case Int.compare(q1,q2) + of LESS => x1::go_nondet(r1,l2) + | GREATER => x2::go_nondet(l1,r2) + | EQUAL => go_nondet(l1,r2) + in + if nondet then go_nondet ll else go_det ll + end + + (*--------------------------------------------------------------------*) + (* what are the least and largest symbol occurring in a Follow set? *) + (*--------------------------------------------------------------------*) + fun boundsFollow (nil:Follow) = (1,0) + | boundsFollow [(q,a)] = (a,a) + | boundsFollow ((q,a)::xs) = (a,#2(List.last xs)) + + (*--------------------------------------------------------------------*) + (* return the list of all symbols occurring in a content model. *) + (*--------------------------------------------------------------------*) + fun cmSymbols cm = + let + fun do_cm(cm,yet) = + case cm + of CM_ELEM a => insertInt(a,yet) + | CM_OPT cm => do_cm(cm,yet) + | CM_REP cm => do_cm(cm,yet) + | CM_PLUS cm => do_cm(cm,yet) + | CM_ALT cms => foldr do_cm yet cms + | CM_SEQ cms => foldr do_cm yet cms + in do_cm(cm,nil) + end + + (*--------------------------------------------------------------------*) + (* given the follow set and the final flag, make a row in the dfa. *) + (*--------------------------------------------------------------------*) + fun makeRow (flw,fin) = + let + val (lo,hi) = boundsFollow flw + val tab = Array.array(hi-lo+1,dfaError) + val _ = app (fn (q,a) => Array.update (tab,a-lo,q)) flw + in + (lo,hi,Array.extract (tab,0,NONE),fin) + end + + end diff -uNr fxp-2.0.orig/src/Parser/Dfa/dfaUtil.sml fxp-2.0/src/Parser/Dfa/dfaUtil.sml --- fxp-2.0.orig/src/Parser/Dfa/dfaUtil.sml Sat Jun 26 02:42:52 2004 +++ fxp-2.0/src/Parser/Dfa/dfaUtil.sml Thu Nov 1 08:59:00 2007 @@ -124,7 +124,7 @@ val tab = Array.array(hi-lo+1,dfaError) val _ = app (fn (q,a) => Array.update (tab,a-lo,q)) flw in - (lo,hi,Array.extract (tab,0,NONE),fin) + (lo,hi,Array.vector tab,fin) end end diff -uNr fxp-2.0.orig/src/Parser/Dtd/dtd.cm fxp-2.0/src/Parser/Dtd/dtd.cm --- fxp-2.0.orig/src/Parser/Dtd/dtd.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dtd/dtd.cm Thu Nov 1 08:59:01 2007 @@ -0,0 +1,20 @@ +Group + signature DtdManager + functor DtdManager +is +#if (SMLNJ_MINOR_VERSION > 40) + dtdAttributes.sml + $/basis.cm +#else + dtdAttributes.orig.sml +#endif + dtdDeclare.sml + dtdManager.sml + + ../entities.cm + ../Params/params.cm + ../Base/base.cm + ../Error/error.cm + ../../Unicode/unicode.cm + ../../Util/util.cm + diff -uNr fxp-2.0.orig/src/Parser/Dtd/dtdAttributes.orig.sml fxp-2.0/src/Parser/Dtd/dtdAttributes.orig.sml --- fxp-2.0.orig/src/Parser/Dtd/dtdAttributes.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Dtd/dtdAttributes.orig.sml Thu Nov 1 08:59:01 2007 @@ -0,0 +1,538 @@ +(*--------------------------------------------------------------------------*) +(* Structure: DtdAttributes *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* checkAttValue : AttValue InternalError *) +(* checkDefinedIds : none *) +(* genMissingAtts : none *) +(* makeAttValue : AttValue InternalError *) +(*--------------------------------------------------------------------------*) +functor DtdAttributes (structure Dtd : Dtd + structure Entities : Entities + structure ParserOptions : ParserOptions) = + struct + structure DtdDeclare = DtdDeclare (structure Dtd = Dtd + structure Entities = Entities + structure ParserOptions = ParserOptions) + open + UniChar UniClasses UtilList + Base Dtd DtdDeclare Errors Entities HookData ParserOptions + + val THIS_MODULE = "DtdAttributes" + + exception AttValue of AppData + + (*--------------------------------------------------------------------*) + (* this is the list of language codes in ISO 639. *) + (*--------------------------------------------------------------------*) + val iso639codes = + Vector.fromList + ["AA","AB","AF","AM","AR","AS","AY","AZ", + "BA","BE","BG","BH","BI","BN","BO","BR", + "CA","CO","CS","CY", + "DA","DE","DZ", + "EL","EN","EO","ES","ET","EU", + "FA","FI","FJ","FO","FR","FY", + "GA","GD","GL","GN","GU", + "HA","HE","HI","HR","HU","HY", + "IA","ID","IE","IK","IN","IS","IT","IU","IW", + "JA","JI","JW", + "KA","KK","KL","KM","KN","KO","KS","KU","KY", + "LA","LN","LO","LT","LV", + "MG","MI","MK","ML","MN","MO","MR","MS","MT","MY", + "NA","NE","NL","NO", + "OC","OM","OR", + "PA","PL","PS","PT", + "QU", + "RM","RN","RO","RU","RW", + "SA","SD","SG","SH","SI","SK","SL","SM","SN","SO","SQ","SR","SS","ST","SU","SV","SW", + "TA","TE","TG","TH","TI","TK","TL","TN","TO","TR","TS","TT","TW", + "UG","UK","UR","UZ", + "VI","VO", + "WO", + "XH", + "YI","YO", + "ZA","ZH","ZU"] + + (*--------------------------------------------------------------------*) + (* a two-dimensional field [0..25][0..25] of booleans for ISO 639. *) + (*--------------------------------------------------------------------*) + val iso639field = + let + val arr = Array.tabulate(26,fn _ => Array.array(26,false)) + val _ = Vector.map + (fn s => Array.update(Array.sub(arr,ord(String.sub(s,0))-65), + ord(String.sub(s,1))-65, + true)) + iso639codes + in Vector.tabulate(26,fn i => Array.extract (Array.sub(arr,i),0,NONE)) + end + + (*--------------------------------------------------------------------*) + (* for a letter, compute ord(toUpper c)-ord(#"A"), for subscripting. *) + (*--------------------------------------------------------------------*) + val toUpperMask = Chars.notb(0wx20) + fun cIndex c = Chars.toInt(Chars.andb(c,toUpperMask)-0wx41) + + (*--------------------------------------------------------------------*) + (* are these two letters an ISO 639 code? *) + (*--------------------------------------------------------------------*) + fun isIso639 (c1,c2) = + if !O_CHECK_ISO639 then + Vector.sub(Vector.sub(iso639field,cIndex c1),cIndex c2) + handle Subscript => false + else isAsciiLetter c1 andalso isAsciiLetter c2 + + (*--------------------------------------------------------------------*) + (* does this match Subcode ('-' Subcode)* ? *) + (* is this a sequence of ('-' Subcode) ? *) + (* Iana codes and user codes also end on ([a-z] | [A-Z])+ *) + (*--------------------------------------------------------------------*) + fun isSubcode' nil = false + | isSubcode' (c::cs) = + let fun doit nil = true + | doit (c::cs) = if c=0wx2D then isSubcode' cs + else isAsciiLetter c andalso doit cs + in isAsciiLetter c andalso doit cs + end + fun isSubcode nil = true + | isSubcode (c::cs) = c=0wx2D andalso isSubcode' cs + val isIanaUser = isSubcode' + + (*--------------------------------------------------------------------*) + (* Check whether a "xml:lang" attribute matches the LanguageID *) + (* production. 2.12: *) + (* *) + (* [33] LanguageID ::= Langcode ('-' Subcode)* *) + (* [34] Langcode ::= ISO639Code | IanaCode | UserCode *) + (* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) *) + (* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ *) + (* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ *) + (* [38] Subcode ::= ([a-z] | [A-Z])+ *) + (* *) + (* print an error and raise AttValue if the "xml:lang" attribute does *) + (* not have a valid value. *) + (*--------------------------------------------------------------------*) + fun checkAttSpec (a,q) (aidx,cs) = + if !O_CHECK_LANGID andalso aidx=xmlLangIdx + then let val valid = case cs + of c::0wx2D::cs' => (c=0wx49 orelse + c=0wx69 orelse + c=0wx58 orelse + c=0wx78) andalso isIanaUser cs' + | c1::c2::cs' => isIso639 (c1,c2) andalso isSubcode cs' + | _ => false + in + if valid then a + else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(cs,IT_LANG_ID)))) + end + else a + + (*--------------------------------------------------------------------*) + (* Normalize an attribute value of type other than CDATA, and split *) + (* it into tokens at space characters. Cf. 3.3.3: *) + (* *) + (* ... If the declared value is not CDATA, then the XML processor *) + (* must further process the normalized attribute value by dis- *) + (* carding any leading and trailing space (#x20) characters, and by *) + (* replacing sequences of space (#x20) characters by a single space *) + (* (#x20) character. *) + (* *) + (* replacement of references is already done when parsing the literal,*) + (* thus we need only do whitespace normalization. we don't need to *) + (* take care of the 3rd rule since replacement of sequences of #x20 *) + (* and then splitting subsumes its effect. *) + (* *) + (* return the list of tokens as character lists and the normalized *) + (* value as a char vector. *) + (*--------------------------------------------------------------------*) + fun splitAttValue av = + let + fun doOne nil = (nil,nil,nil) + | doOne (c::cs) = if c=0wx20 then let val (toks,ys) = doAll true cs + in (nil,toks,ys) + end + else let val (tok,toks,ys) = doOne cs + in ((c::tok),toks,c::ys) + end + and doAll addS nil = (nil,nil) + | doAll addS (c::cs) = if c=0wx20 then doAll addS cs + else let val (tok,toks,ys) = doOne cs + in ((c::tok)::toks, + if addS then 0wx20::c::ys else c::ys) + end + + val (tokens,normed) = doAll false av + in (Data2Vector normed,tokens) + end + (*--------------------------------------------------------------------*) + (* normalize an attribute value other than CDATA according to 3.3.3. *) + (* *) + (* return the normalized att value as a Vector. *) + (*--------------------------------------------------------------------*) + fun normAttValue av = + let fun doOne nil = nil + | doOne (c::cs) = if c=0wx20 then doAll true cs + else c::doOne cs + and doAll addS nil = nil + | doAll addS (c::cs) = if c=0wx20 then doAll addS cs + else let val ys = doOne cs + in if addS then 0wx20::c::ys else c::ys + end + val normed = doAll false av + in Data2Vector normed + end + + (*--------------------------------------------------------------------*) + (* Check whether a sequence of chars forms a name (token). *) + (*--------------------------------------------------------------------*) + fun isNmToken cs = List.all isName cs + fun isaName nil = false + | isaName (c::cs) = isNms c andalso List.all isName cs + + (*--------------------------------------------------------------------*) + (* Check whether a list of tokens is a single what fulfilling isWhat. *) + (* print an error and raise AttValue if it is not. *) + (*--------------------------------------------------------------------*) + fun checkOne (isWhat,what,detail) (a,q) toks = + case toks + of nil => raise AttValue (hookError(a,(getPos q,ERR_EXACTLY_ONE detail))) + | [one] => if isWhat one then one + else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(one,what)))) + | more => raise AttValue(hookError(a,(getPos q,ERR_AT_MOST_ONE detail))) + (*--------------------------------------------------------------------*) + (* Check whether a list of tokens is non-empty and all elements ful- *) + (* fil isWhat. *) + (* print an error and raise AttValue if not. *) + (*--------------------------------------------------------------------*) + fun checkList (isWhat,what,detail) (a,q) toks = + case toks + of nil => raise AttValue (hookError(a,(getPos q,ERR_AT_LEAST_ONE detail))) + | _ => app (fn one => if isWhat one then () + else let val err = ERR_ATT_IS_NOT(one,what) + in raise AttValue(hookError(a,(getPos q,err))) + end) toks + (*--------------------------------------------------------------------*) + (* Convert a list of tokens into an ID att value. 3.3.1: *) + (* *) + (* Validity Constraint: ID *) + (* Values of type ID must match the Name production. *) + (* *) + (* Validity Constraint: ID *) + (* ... A name must not appear more than once in an XML document as *) + (* a value of this type; i.e., ID values must uniquely identify the *) + (* elements which bear them. *) + (* *) + (* mark the value as used, print an error and raise AttValue if it *) + (* was already used. *) + (* print an error and raise AttValue if it is not a name. *) + (*--------------------------------------------------------------------*) + fun takeId (dtd,inDtd) (a,q) toks = + let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks + val idx = Id2Index dtd one + val _ = if inDtd then () + else let val (decl,refs) = getId dtd idx + in if decl then let val err = ERR_REPEATED_ID one + in raise AttValue (hookError(a,(getPos q,err))) + end + else setId dtd (idx,(true,refs)) + end + in (SOME(AV_ID idx),a) + end + + (*--------------------------------------------------------------------*) + (* Convert a list of tokens into an IDREF/IDREFS att value. 3.3.1: *) + (* *) + (* Validity Constraint: IDREF *) + (* Values of type IDREF must match the Name production. *) + (* *) + (* print an error an raise AttValue if it is not a (list of) name(s). *) + (*--------------------------------------------------------------------*) + fun setIdRef (dtd,q) idx = + let val (decl,refs) = getId dtd idx + in setId dtd (idx,(decl,getPos q::refs)) + end + fun takeIdref (dtd,_) (a,q) toks = + let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks + val idx=Id2Index dtd one + val _ = setIdRef (dtd,q) idx + in (SOME(AV_IDREF idx),a) + end + fun takeIdrefs (dtd,_) (a,q) toks = + let val _ = checkList (isaName,IT_NAME,IT_ID_NAME) (a,q) toks + val idxs = map (Id2Index dtd) toks + val _ = app (setIdRef (dtd,q)) idxs + in (SOME(AV_IDREFS idxs),a) + end + + (*--------------------------------------------------------------------*) + (* Convert a list of tokens into an ENTITY/IES att value. 3.3.1: *) + (* *) + (* Validity Constraint: Entity Name *) + (* Values of type ENTITY must match the Name production... *) + (* must match the name of an unparsed entity declared in the DTD. *) + (* *) + (* print an error and raise AttValue if a token is not a name. *) + (* print an error and raise AttValue if an entity is undeclared or a *) + (* parsed entity. *) + (*--------------------------------------------------------------------*) + fun checkEntity (dtd,inDtd) (a,q) name = + let val idx = GenEnt2Index dtd name + val (ent,_) = getGenEnt dtd idx + val _ = if inDtd then () + else case ent + of GE_UNPARSED _ => () + | GE_NULL => let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE) + in raise AttValue (hookError(a,(getPos q,err))) + end + | _ => let val err = ERR_MUST_BE_UNPARSED(name,LOC_NONE) + in raise AttValue (hookError(a,(getPos q,err))) + end + in idx + end + fun takeEntity (dtd,inDtd) (aq as (a,_)) toks = + let val one = checkOne (isaName,IT_NAME,IT_ENT_NAME) aq toks + val idx = checkEntity (dtd,inDtd) aq one + in (SOME(AV_ENTITY idx),a) + end + fun takeEntities (dtd,inDtd) (aq as (a,_)) toks = + let val _ = checkList (isaName,IT_NAME,IT_ENT_NAME) aq toks + val idxs = map (checkEntity (dtd,inDtd) aq) toks + in (SOME(AV_ENTITIES idxs),a) + end + + (*--------------------------------------------------------------------*) + (* Convert a list of tokens into a NOTATION att value. 3.3.1: *) + (* *) + (* Validity Constraint: Notation Attributes *) + (* Values of this type must match one of the notation names *) + (* included in the declaration. *) + (* *) + (* print an error and raise AttValue if it is not a single name. *) + (* print an error and raise AttValue if the notation's index is not *) + (* in the list given as 1st arg. *) + (*--------------------------------------------------------------------*) + fun takeNotation is (dtd,inDtd) (aq as (a,q)) toks = + let val one = checkOne (isaName,IT_NAME,IT_NOT_NAME) aq toks + val idx = AttNot2Index dtd one + val _ = if member idx is then () + else let val nots = map (Index2AttNot dtd) is + val err = ERR_MUST_BE_AMONG(IT_NOT_NAME,one,nots) + in raise AttValue (hookError(a,(getPos q,err))) + end + in (SOME(AV_NOTATION(is,idx)),a) + end + + (*--------------------------------------------------------------------*) + (* Convert a list of tokens into an enumerated att value. 3.3.1: *) + (* *) + (* Validity Constraint: Enumeration *) + (* Values of this type must match one of the Nmtoken tokens in *) + (* the declaration. *) + (* *) + (* print an error and raise AttValue if it is not a single name token.*) + (* print an error and raise AttValue if the token's index is not *) + (* in the list given as 1st arg. *) + (*--------------------------------------------------------------------*) + fun takeGroup is (dtd,_) (aq as (a,q)) toks = + let val one = checkOne (isNmToken,IT_NMTOKEN,IT_NMTOKEN) aq toks + val idx = AttNot2Index dtd one + val _ = if member idx is then () + else let val toks = map (Index2AttNot dtd) is + val err = ERR_MUST_BE_AMONG(IT_NMTOKEN,one,toks) + in raise AttValue (hookError(a,(getPos q,err))) + end + in (SOME(AV_GROUP(is,idx)),a) + end + + (*--------------------------------------------------------------------*) + (* Given an attribute type and a list of characters, construct the *) + (* corresponding AttValue. *) + (* *) + (* print an error (and possibly raise AttValue) if the attribute *) + (* is ill-formed. *) + (*--------------------------------------------------------------------*) + fun makeAttValue dtd (a,q) (aidx,attType,ext,inDtd,cs) = + if attType=AT_CDATA + then let val cv = Data2Vector cs + in if !O_VALIDATE andalso hasDtd dtd + then (cv,(SOME(AV_CDATA cv),checkAttSpec (a,q) (aidx,cs))) + else (cv,(NONE,a)) + end + else + if !O_VALIDATE andalso hasDtd dtd then + let + val a1 = checkAttSpec (a,q) (aidx,cs) + val (cv,toks) = splitAttValue cs + val a2 = + if ext andalso standsAlone dtd + then let val cdata = Data2Vector cs + in if cdata=cv then a1 + else let val err = ERR_STANDALONE_NORM(Index2AttNot dtd aidx) + val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) + in hookError(a1,(getPos q,err)) + end + end + else a1 + in case attType + of AT_NMTOKEN => (cv,(SOME(AV_NMTOKEN(checkOne(isNmToken,IT_NMTOKEN, + IT_NMTOKEN) (a2,q) toks)),a2)) + | AT_NMTOKENS => (cv,(SOME(AV_NMTOKENS toks),a2)) before + checkList(isNmToken,IT_NMTOKEN,IT_NMTOKEN) (a2,q) toks + | AT_ID => (cv,takeId (dtd,inDtd) (a2,q) toks) + | AT_IDREF => (cv,takeIdref (dtd,inDtd) (a2,q) toks) + | AT_IDREFS => (cv,takeIdrefs (dtd,inDtd) (a2,q) toks) + | AT_ENTITY => (cv,takeEntity (dtd,inDtd) (a2,q) toks) + | AT_ENTITIES => (cv,takeEntities (dtd,inDtd) (a2,q) toks) + | AT_GROUP is => (cv,takeGroup is (dtd,inDtd) (a2,q) toks) + | AT_NOTATION is => (cv,takeNotation is (dtd,inDtd) (a2,q) toks) + | AT_CDATA => raise InternalError(THIS_MODULE,"makeAttValue", + "AT_CDATA in the innermost case") + end + else (normAttValue cs,(NONE,a)) + + (*--------------------------------------------------------------------*) + (* given an attribute value literal and the attribute type, generate *) + (* the AttValue, and check whether it complies with its default value.*) + (* If yes, make an AttPresent value out of it. *) + (* See 3.3.2: *) + (* *) + (* Validity Constraint: Fixed Attribute Default *) + (* If an attribute has a default value declared with the #FIXED *) + (* keyword, instances of that attribute must match the default *) + (* value. *) + (* *) + (* print an error and raise AttValue if the attribute value doesn't *) + (* comply. *) + (* *) + (* return the value as a AttPresent value. *) + (*--------------------------------------------------------------------*) + fun checkAttValue dtd (a,q) ((aidx,attType,defVal,ext),literal,cs) = + let val (cv,(av,a1)) = makeAttValue dtd (a,q) (aidx,attType,ext,false,cs) + in if !O_VALIDATE andalso hasDtd dtd then + case defVal + of AD_FIXED((def,cv',_),_) => + if cv=cv' then (AP_PRESENT(literal,cv,av),a1) + else raise AttValue + (hookError(a1,(getPos q,ERR_FIXED_VALUE(Index2AttNot dtd aidx,cv,cv')))) + | _ => (AP_PRESENT(literal,cv,av),a1) + else (AP_PRESENT(literal,cv,av),a1) + end + + (*--------------------------------------------------------------------*) + (* check a defaulted attribute value for validity. *) + (* *) + (* since the lexical constraints are checked when the default is *) + (* declared we only need to check whether notations are declared and *) + (* entities are declared and unparsed. An ID attribute cannot be *) + (* defaulted, so no need to check for duplicate ID attributes. *) + (*--------------------------------------------------------------------*) + fun checkDefaultValue dtd (a,q,pos) av = + let + fun checkEntity (idx,a) = + let val (ent,_) = getGenEnt dtd idx + in case ent + of GE_UNPARSED _ => a + | GE_NULL => hookError(a,(getPos q,ERR_UNDECLARED + (IT_GEN_ENT,Index2GenEnt dtd idx, + LOC_ATT_DEFAULT pos))) + | _ => hookError(a,(getPos q,ERR_MUST_BE_UNPARSED + (Index2GenEnt dtd idx,LOC_ATT_DEFAULT pos))) + end + + fun checkNotation (idx,a) = + if hasNotation dtd idx then a + else hookError(a,(getPos q,ERR_UNDECLARED + (IT_NOTATION,Index2AttNot dtd idx,LOC_ATT_DEFAULT pos))) + in + case av + of SOME(AV_ENTITY i) => checkEntity (i,a) + | SOME(AV_ENTITIES is) => foldl checkEntity a is + | SOME(AV_NOTATION(_,i)) => checkNotation(i,a) + | _ => a + end + + (*--------------------------------------------------------------------*) + (* Generate the attributes not specified in a start-tag, the defs of *) + (* these atts and the specified atts given as argument. 3.3.2: *) + (* *) + (* If the declaration is neither #REQUIRED nor #IMPLIED, then the *) + (* AttValue value contains the declared default value; ... If a *) + (* default value is declared, when an XML processor encounters an *) + (* omitted attribute, it is to behave as though the attribute were *) + (* present with the declared default value. *) + (* *) + (* Validity Constraint: Required Attribute *) + (* If the default declaration is the keyword #REQUIRED, then the *) + (* attribute must be specified for all elements of the type in the *) + (* attribute-list declaration. *) + (* *) + (* print an error if a required attribute was omitted. *) + (* *) + (* return the AttSpecList of all attributes for this tag. *) + (*--------------------------------------------------------------------*) + fun genMissingAtts dtd (a,q) (defs,specd) = + let + fun default a (idx,(v as (_,_,av),(pos,checked)),ext) = + let val a1 = if ext andalso !O_VALIDATE andalso standsAlone dtd + then let val err = ERR_STANDALONE_DEF(Index2AttNot dtd idx) + val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) + in hookError(a,(getPos q,err)) + end + else a + val a2 = if !O_VALIDATE andalso not (!checked andalso !O_ERROR_MINIMIZE) + then checkDefaultValue dtd (a1,q,pos) av before checked := true + else a1 + in (AP_DEFAULT v,a1) + end + fun doit a nil = (specd,a) + | doit a ((idx,_,dv,ext)::rest) = + let val (value,a1) = + case dv + of AD_DEFAULT v => default a (idx,v,ext) + | AD_FIXED v => default a (idx,v,ext) + | AD_IMPLIED => (AP_IMPLIED,a) + | AD_REQUIRED => + let val a1 = if not (!O_VALIDATE) then a + else hookError(a,(getPos q, + ERR_MISSING_ATT(Index2AttNot dtd idx))) + in (AP_MISSING,a1) + end + val (other,a2) = doit a1 rest + in ((idx,value,NONE)::other,a2) + end + in doit a defs + end + + (*--------------------------------------------------------------------*) + (* process an undeclared attribute in a start-tag. *) + (* At option, an error message is generated only once for the same *) + (* attribute and element. *) + (* *) + (* possibly print an error. *) + (* *) + (* return nothing. *) + (*--------------------------------------------------------------------*) + fun handleUndeclAtt dtd (a,q) (aidx,att,eidx,elem) = + if !O_ERROR_MINIMIZE then + let val {decl,atts,errAtts} = getElement dtd eidx + in if member aidx errAtts then a + else let val a1 = if !O_VALIDATE andalso hasDtd dtd + then let val err = ERR_UNDECL_ATT(att,elem) + in hookError(a,(getPos q,err)) + end + else a + val a2 = checkAttName (a1,q) att + val _ = setElement dtd (eidx,{decl = decl, + atts = atts, + errAtts = aidx::errAtts}) + in a2 + end + end + else let val a1 = if !O_VALIDATE andalso hasDtd dtd + then hookError(a,(getPos q,ERR_UNDECL_ATT(att,elem))) + else a + in checkAttName (a1,q) att + end + + end diff -uNr fxp-2.0.orig/src/Parser/Dtd/dtdAttributes.sml fxp-2.0/src/Parser/Dtd/dtdAttributes.sml --- fxp-2.0.orig/src/Parser/Dtd/dtdAttributes.sml Sat Jun 26 02:42:53 2004 +++ fxp-2.0/src/Parser/Dtd/dtdAttributes.sml Thu Nov 1 08:59:01 2007 @@ -65,7 +65,7 @@ ord(String.sub(s,1))-65, true)) iso639codes - in Vector.tabulate(26,fn i => Array.extract (Array.sub(arr,i),0,NONE)) + in Vector.tabulate(26,fn i => Array.vector (Array.sub(arr,i))) end (*--------------------------------------------------------------------*) diff -uNr fxp-2.0.orig/src/Parser/Error/error.cm fxp-2.0/src/Parser/Error/error.cm --- fxp-2.0.orig/src/Parser/Error/error.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Error/error.cm Thu Nov 1 08:59:02 2007 @@ -0,0 +1,17 @@ +Group + structure Errors +is + errorData.sml + errorMessage.sml + errorString.sml + errorUtil.sml + errors.sml + expected.sml + + ../../Util/util.cm + ../../Unicode/unicode.cm + +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Parser/Params/dtd.orig.sml fxp-2.0/src/Parser/Params/dtd.orig.sml --- fxp-2.0.orig/src/Parser/Params/dtd.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Params/dtd.orig.sml Thu Nov 1 08:59:04 2007 @@ -0,0 +1,339 @@ +(*--------------------------------------------------------------------------*) +(* Structure: Dtd *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* AttNot2Index : none *) +(* Element2Index : none *) +(* GenEnt2Index : none *) +(* Id2Index : none *) +(* Index2AttNot : NoSuchIndex *) +(* Index2Element : NoSuchIndex *) +(* Index2GenEnt : NoSuchIndex *) +(* Index2Id : NoSuchIndex *) +(* Index2ParEnt : NoSuchIndex *) +(* ParEnt2Index : none *) +(* entitiesWellformed : none *) +(* getElement : NoSuchIndex *) +(* getGenEnt : NoSuchIndex *) +(* getId : NoSuchIndex *) +(* getNotation : NoSuchIndex *) +(* getParEnt : NoSuchIndex *) +(* hasNotation : NoSuchIndex *) +(* initDtdTables : none *) +(* maxUsedElem : none *) +(* maxUsedId : none *) +(* printAttNotTable : none *) +(* printIdTable : none *) +(* printParEntTable : none *) +(* printxElementTable : none *) +(* printxGenEntTable : none *) +(* setElement : NoSuchIndex *) +(* setGenEnt : NoSuchIndex *) +(* setId : NoSuchIndex *) +(* setNotation : NoSuchIndex *) +(* setParEnt : NoSuchIndex *) +(*--------------------------------------------------------------------------*) +signature Dtd = + sig + type Dtd + + val hasDtd : Dtd -> bool + val hasExternal : Dtd -> bool + val standsAlone : Dtd -> bool + + val setHasDtd : Dtd -> unit + val setExternal : Dtd -> unit + val setStandAlone : Dtd -> bool -> unit + + val entitiesWellformed : Dtd -> bool + + val validPredef : int -> UniChar.Vector + val isRedefined : Dtd -> int -> bool + val setRedefined : Dtd -> int -> unit + val notRedefined : Dtd -> UniChar.Data list + + val AttNot2Index : Dtd -> UniChar.Data -> int + val Element2Index : Dtd -> UniChar.Data -> int + val Id2Index : Dtd -> UniChar.Data -> int + val GenEnt2Index : Dtd -> UniChar.Data -> int + val ParEnt2Index : Dtd -> UniChar.Data -> int + + + val hasAttNot : Dtd -> UniChar.Data -> int option + val hasElement : Dtd -> UniChar.Data -> int option + val hasId : Dtd -> UniChar.Data -> int option + val hasGenEnt : Dtd -> UniChar.Data -> int option + val hasParEnt : Dtd -> UniChar.Data -> int option + + val Index2Element : Dtd -> int -> UniChar.Data + val Index2Id : Dtd -> int -> UniChar.Data + val Index2GenEnt : Dtd -> int -> UniChar.Data + val Index2AttNot : Dtd -> int -> UniChar.Data + val Index2ParEnt : Dtd -> int -> UniChar.Data + + val getId : Dtd -> int -> Base.IdInfo + val getElement : Dtd -> int -> Base.ElemInfo + val getGenEnt : Dtd -> int -> Base.GenEntInfo + val getNotation : Dtd -> int -> Base.NotationInfo + val getParEnt : Dtd -> int -> Base.ParEntInfo + + val hasNotation : Dtd -> int -> bool + + val setId : Dtd -> int * Base.IdInfo -> unit + val setElement : Dtd -> int * Base.ElemInfo -> unit + val setGenEnt : Dtd -> int * Base.GenEntInfo -> unit + val setNotation : Dtd -> int * Base.ExternalId -> unit + val setParEnt : Dtd -> int * Base.ParEntInfo -> unit + + val maxUsedId : Dtd -> int + val maxUsedElem : Dtd -> int + val maxUsedGen : Dtd -> int + + val initDtdTables : unit -> Dtd + val printDtdTables : Dtd -> unit + + val printAttNotTable : Dtd -> unit + val printIdTable : Dtd -> unit + val printElementTable : Dtd -> unit + val printGenEntTable : Dtd -> unit + val printParEntTable : Dtd -> unit + + val defaultIdx : int + val preserveIdx : int + val xmlLangIdx : int + val xmlSpaceIdx : int + end + +structure Dtd : Dtd = + struct + open + UtilInt + Base UniChar + DataDict DataSymTab + + val O_TS_ELEM = ref 6 (* Initial size of element table *) + val O_TS_GEN_ENT = ref 6 (* Initial size of general entity table *) + val O_TS_ID = ref 6 (* Initial size of id attribute table *) + val O_TS_ATT_NOT = ref 6 (* Initial size of notation table *) + val O_TS_PAR_ENT = ref 6 (* Initial size of parameter entity table *) + + (*--------------------------------------------------------------------*) + (* this is how the predefined entities must be declared. *) + (*--------------------------------------------------------------------*) + val predefined = Vector.fromList + (map (fn (x,y,z) => (String2Data x,String2Vector y,String2Vector z)) + [("","",""), + ("amp" ,"'&'","&"), + ("lt" ,"'<'","<"), + ("gt" ,"'>'",">"), + ("apos","\"'\"" ,"'" ), + ("quot","'\"'" ,"\"" )]) + fun validPredef i = #3(Vector.sub(predefined,i)) + + (*--------------------------------------------------------------------*) + (* this type holds all information relevent to the DTD. *) + (*--------------------------------------------------------------------*) + type Dtd = {hasDtdFlag : bool ref, + standAloneFlag : bool ref, + externalFlag : bool ref, + elDict : ElemInfo DataDict.Dict, + genDict : GenEntInfo DataDict.Dict, + idDict : IdInfo DataDict.Dict, + notDict : NotationInfo DataDict.Dict, + parDict : ParEntInfo DataDict.Dict, + preRedef : bool array + } + + fun newDtd() = {hasDtdFlag = ref false, + standAloneFlag = ref false, + externalFlag = ref false, + elDict = nullDict ("element",nullElemInfo), + idDict = nullDict ("ID name",nullIdInfo), + genDict = nullDict ("general entity",(GE_NULL,false)), + notDict = nullDict ("attribute and notation",NONE:NotationInfo), + parDict = nullDict ("parameter entity",(PE_NULL,false)), + preRedef = Array.array(6,false) + } : Dtd + + val default = String2Data "default" + val preserve = String2Data "preserve" + val xmlLang = String2Data "xml:lang" + val xmlSpace = String2Data "xml:space" + + (*--------------------------------------------------------------------*) + (* standalone status, existance of a DTD and of external declarations *) + (* externalFlag is true if there is an external subset or a (not nece-*) + (* ssarily external) parameter entity reference in the DTD. (cf. 4.1) *) + (*--------------------------------------------------------------------*) + fun standsAlone (dtd:Dtd) = !(#standAloneFlag dtd) + fun hasExternal (dtd:Dtd) = !(#externalFlag dtd) + fun hasDtd (dtd:Dtd) = !(#hasDtdFlag dtd) + + fun setHasDtd (dtd:Dtd) = #hasDtdFlag dtd := true + fun setExternal (dtd:Dtd) = #externalFlag dtd := true + fun setStandAlone (dtd:Dtd) x = #standAloneFlag dtd := x + + + (*--------------------------------------------------------------------*) + (* 4.1: *) + (* Well-Formedness Constraint: Entity Declared *) + (* In a document without any DTD, a document with only an internal *) + (* DTD subset which contains no parameter entity references, or a *) + (* document with "standalone='yes'", the Name given in the entity *) + (* reference must match that in an entity declaration ... Note that *) + (* if entities are declared in the external subset or in external *) + (* parameter entities, a non-validating processor is not obligated *) + (* to read and process their declarations; for such documents, the *) + (* rule that an entity must be declared is a well-formedness *) + (* constraint only if standalone='yes'. *) + (* *) + (* Thus a reference to an undeclared entity is a well-formedness *) + (* error if either #hasDtdFlag or #externalFlag is false, or if *) + (* #standaloneFlag is true *) + (*--------------------------------------------------------------------*) + (* bug fixed 080600: changed !hasDtdFlag to not(!hasDtdFlag) *) + (*--------------------------------------------------------------------*) + fun entitiesWellformed ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) = + not (!hasDtdFlag andalso !externalFlag) orelse !standAloneFlag + + fun initStandalone ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) = + (hasDtdFlag := false; standAloneFlag := false; externalFlag := false) + + (*--------------------------------------------------------------------*) + (* this array tells whether the predefined entities (index 1-5) have *) + (* been declared in the dtd. *) + (*--------------------------------------------------------------------*) + fun isRedefined (dtd:Dtd) i = Array.sub(#preRedef dtd,i) + fun setRedefined (dtd:Dtd) i = Array.update(#preRedef dtd,i,true) + fun notRedefined dtd = List.mapPartial + (fn i => if isRedefined dtd i then NONE else SOME(#1(Vector.sub(predefined,i)))) + [1,2,3,4,5] + + fun AttNot2Index (dtd:Dtd) name = getIndex(#notDict dtd,name) + fun Element2Index (dtd:Dtd) name = getIndex(#elDict dtd,name) + fun GenEnt2Index (dtd:Dtd) name = getIndex(#genDict dtd,name) + fun Id2Index (dtd:Dtd) name = getIndex(#idDict dtd,name) + fun ParEnt2Index (dtd:Dtd) name = getIndex(#parDict dtd,name) + + fun hasAttNot (dtd:Dtd) name = hasIndex(#notDict dtd,name) + fun hasElement (dtd:Dtd) name = hasIndex(#elDict dtd,name) + fun hasId (dtd:Dtd) name = hasIndex(#idDict dtd,name) + fun hasGenEnt (dtd:Dtd) name = hasIndex(#genDict dtd,name) + fun hasParEnt (dtd:Dtd) name = hasIndex(#parDict dtd,name) + + fun Index2AttNot (dtd:Dtd) idx = getKey(#notDict dtd,idx) + fun Index2Element (dtd:Dtd) idx = getKey(#elDict dtd,idx) + fun Index2GenEnt (dtd:Dtd) idx = getKey(#genDict dtd,idx) + fun Index2Id (dtd:Dtd) idx = getKey(#idDict dtd,idx) + fun Index2ParEnt (dtd:Dtd) idx = getKey(#parDict dtd,idx) + + fun getElement (dtd:Dtd) idx = getByIndex(#elDict dtd,idx) + fun getGenEnt (dtd:Dtd) idx = getByIndex(#genDict dtd,idx) + fun getId (dtd:Dtd) idx = getByIndex(#idDict dtd,idx) + fun getNotation (dtd:Dtd) idx = getByIndex(#notDict dtd,idx) + fun getParEnt (dtd:Dtd) idx = getByIndex(#parDict dtd,idx) + + fun hasNotation (dtd:Dtd) idx = isSome(getByIndex(#notDict dtd,idx)) + + fun setElement (dtd:Dtd) (idx,el) = setByIndex(#elDict dtd,idx,el) + fun setGenEnt (dtd:Dtd) (idx,ge) = setByIndex(#genDict dtd,idx,ge) + fun setId (dtd:Dtd) (idx,a) = setByIndex(#idDict dtd,idx,a) + fun setNotation (dtd:Dtd) (idx,nt) = setByIndex(#notDict dtd,idx,SOME nt) + fun setParEnt (dtd:Dtd) (idx,pe) = setByIndex(#parDict dtd,idx,pe) + + fun maxUsedElem (dtd:Dtd) = usedIndices(#elDict dtd)-1 + fun maxUsedGen (dtd:Dtd) = usedIndices(#genDict dtd)-1 + fun maxUsedId (dtd:Dtd) = usedIndices(#idDict dtd)-1 + + (*--------------------------------------------------------------------*) + (* initialize the attribute tables. Make sure that indices 0...3 are *) + (* assigned to "default", "preserve", "xml:lang" and "xml:space". *) + (*--------------------------------------------------------------------*) + fun initAttNotTable (dtd as {idDict,notDict,...}:Dtd) = + let + val _ = clearDict(notDict,SOME(!O_TS_ATT_NOT)) + val _ = clearDict(idDict,SOME(!O_TS_ID)) + val _ = AttNot2Index dtd default + val _ = AttNot2Index dtd preserve + val _ = AttNot2Index dtd xmlLang + val _ = AttNot2Index dtd xmlSpace + in () + end + fun initElementTable (dtd:Dtd) = clearDict(#elDict dtd,SOME(!O_TS_ELEM)) + (*--------------------------------------------------------------------*) + (* reserve 0 for gen entity -, i.e., the document entity. *) + (* reserve 1 for gen entity amp, i.e., "&#38;" *) + (* reserve 2 for gen entity lt, i.e., "&#60;" *) + (* reserve 3 for gen entity gt, i.e., ">" *) + (* reserve 4 for gen entity apos, i.e., "'" *) + (* reserve 5 for gen entity quot, i.e., """ *) + (* reserve 0 for par entity -, i.e., the external dtd subset. *) + (* *) + (* Cf. 4.1: *) + (* *) + (* ... except that well-formed documents need not declare any of *) + (* the following entities: amp, lt, gt, apos, quot. *) + (* *) + (* and 4.6: *) + (* *) + (* *) + (* *) + (* *) + (* *) + (* *) + (*--------------------------------------------------------------------*) + fun initEntityTables (dtd as {genDict,parDict,preRedef,...}:Dtd) = + let + val _ = clearDict(genDict,SOME(!O_TS_GEN_ENT)) + val _ = clearDict(parDict,SOME(!O_TS_PAR_ENT)) + val _ = map (fn i => Array.update(preRedef,i,false)) [1,2,3,4,5] + val _ = GenEnt2Index dtd [0wx2D] (* "-" *) + val _ = ParEnt2Index dtd [0wx2D] (* "-" *) + val _ = Vector.appi + (fn (_,(name,lit,cs)) + => (setGenEnt dtd (GenEnt2Index dtd name,(GE_INTERN(lit,cs),false)))) + (predefined,1,NONE) + in () + end + + fun initDtdTables() = + let + val dtd = newDtd() + val _ = initAttNotTable dtd + val _ = initElementTable dtd + val _ = initEntityTables dtd + val _ = initStandalone dtd + in dtd + end + + local + val dtd = initDtdTables() + in + val defaultIdx = AttNot2Index dtd default + val preserveIdx = AttNot2Index dtd preserve + val xmlLangIdx = AttNot2Index dtd xmlLang + val xmlSpaceIdx = AttNot2Index dtd xmlSpace + end + + fun printAttNotTable (dtd:Dtd) = + printDict NotationInfo2String (#notDict dtd) + fun printElementTable dtd = + printDict (ElemInfo2xString (UniChar.Data2String o (Index2AttNot dtd), + UniChar.Data2String o (Index2Element dtd), + UniChar.Data2String o (Index2GenEnt dtd), + UniChar.Data2String o (Index2Id dtd), + UniChar.Data2String o (Index2AttNot dtd))) (#elDict dtd) + fun printGenEntTable dtd = + printDict (fn (ent,ext) => GenEntity2xString (Data2String o (Index2AttNot dtd)) ent + ^(if ext then "[external]" else "")) (#genDict dtd) + fun printIdTable (dtd:Dtd) = printDict (IdInfo2String) (#idDict dtd) + fun printParEntTable (dtd:Dtd) = + printDict (fn (ent,ext) => ParEntity2String ent + ^(if ext then "[external]" else "")) (#parDict dtd) + + fun printDtdTables dtd = (printAttNotTable dtd; + printElementTable dtd; + printGenEntTable dtd; + printIdTable dtd; + printParEntTable dtd) + end diff -uNr fxp-2.0.orig/src/Parser/Params/dtd.sml fxp-2.0/src/Parser/Params/dtd.sml --- fxp-2.0.orig/src/Parser/Params/dtd.sml Sat Jun 26 02:42:53 2004 +++ fxp-2.0/src/Parser/Params/dtd.sml Thu Nov 1 08:59:04 2007 @@ -289,10 +289,10 @@ val _ = map (fn i => Array.update(preRedef,i,false)) [1,2,3,4,5] val _ = GenEnt2Index dtd [0wx2D] (* "-" *) val _ = ParEnt2Index dtd [0wx2D] (* "-" *) - val _ = Vector.appi - (fn (_,(name,lit,cs)) - => (setGenEnt dtd (GenEnt2Index dtd name,(GE_INTERN(lit,cs),false)))) - (predefined,1,NONE) + val _ = VectorSlice.appi + (fn (_,(name,lit,cs)) + => (setGenEnt dtd (GenEnt2Index dtd name,(GE_INTERN(lit,cs),false)))) + (VectorSlice.slice (predefined,1,NONE)) in () end diff -uNr fxp-2.0.orig/src/Parser/Params/params.cm fxp-2.0/src/Parser/Params/params.cm --- fxp-2.0.orig/src/Parser/Params/params.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Params/params.cm Thu Nov 1 08:59:04 2007 @@ -0,0 +1,32 @@ +Group + signature Dtd + structure Dtd + + signature Hooks + structure HookData + structure IgnoreHooks + + signature ParserOptions + functor ParserOptions + + signature Resolve + structure ResolveNull +is +#if (SMLNJ_MINOR_VERSION > 40) + dtd.sml + $/basis.cm +#else + dtd.orig.sml +#endif + hookData.sml + hooks.sml + ignore.sml + parserOptions.sml + resolve.sml + + ../Dfa/dfa.cm + ../Error/error.cm + ../Base/base.cm + ../../Util/util.cm + ../../Unicode/unicode.cm + diff -uNr fxp-2.0.orig/src/Parser/Parse/parse.cm fxp-2.0/src/Parser/Parse/parse.cm --- fxp-2.0.orig/src/Parser/Parse/parse.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Parse/parse.cm Thu Nov 1 08:59:03 2007 @@ -0,0 +1,30 @@ +Group + functor Parse +is + parseBase.sml +#if (SMLNJ_MINOR_VERSION > 40) + parseContent.sml + $/basis.cm +#else + parseContent.orig.sml +#endif + parseDecl.sml + parseDocument.sml + parseDtd.sml + parseLiterals.sml + parseMisc.sml + parseNames.sml + parseRefs.sml + parseTags.sml + parseXml.sml + + ../Base/base.cm + ../Error/error.cm + ../Dfa/dfa.cm + ../Dtd/dtd.cm + ../Params/params.cm + ../../Unicode/unicode.cm + ../../Util/util.cm + ../../Util/SymDict/symdict.cm + + diff -uNr fxp-2.0.orig/src/Parser/Parse/parseBase.sml fxp-2.0/src/Parser/Parse/parseBase.sml --- fxp-2.0.orig/src/Parser/Parse/parseBase.sml Sat Jun 26 02:42:54 2004 +++ fxp-2.0/src/Parser/Parse/parseBase.sml Thu Nov 1 18:16:09 2007 @@ -1,6 +1,10 @@ signature ParseBase = sig - include Dfa DtdManager Resolve DfaOptions ParserOptions + include Dfa + include DtdManager + include Resolve + include DfaOptions + include ParserOptions exception NoSuchChar of AppData * State exception NoSuchEntity of AppData * State diff -uNr fxp-2.0.orig/src/Parser/Parse/parseContent.orig.sml fxp-2.0/src/Parser/Parse/parseContent.orig.sml --- fxp-2.0.orig/src/Parser/Parse/parseContent.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/Parse/parseContent.orig.sml Thu Nov 1 08:59:03 2007 @@ -0,0 +1,917 @@ +signature ParseContent = + sig + (*---------------------------------------------------------------------- + include ParseBase + + val parseName : UniChar.Char * AppData * State + -> UniChar.Data * (UniChar.Char * AppData * State) + + val openDocument : Uri.Uri option -> AppData + -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) + + val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) + val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) + + val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) + val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) + + val skipTag : Errors.Location -> AppData * State -> (UniChar.Char * AppData * State) + val parseSTag : Dtd -> Errors.Position -> UniChar.Char * AppData * State + -> (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) + + val skipDecl : bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State + + val parseDocTypeDecl : Dtd -> (UniChar.Char * AppData * State) + -> int option * (UniChar.Char * AppData * State) + ----------------------------------------------------------------------*) + include ParseDtd + + val skipBadSection : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) + + val parseElement : Dtd * int list * State * (HookData.StartTagInfo * Base.ElemInfo) + * (UniChar.Char * AppData * State) + -> (int * UniChar.Data * Errors.Position * Errors.Position) option + * (UniChar.Char * AppData * State) + end + +(*--------------------------------------------------------------------------*) +(* Structure: ParseContent *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* skipBadSection : none *) +(* parseElement : none *) +(*--------------------------------------------------------------------------*) +functor ParseContent (structure ParseBase : ParseBase) + : ParseContent = +struct + structure ParseDtd = ParseDtd (structure ParseBase = ParseBase) + + open + Base Errors UniChar UniClasses UtilList + ParseDtd + + val THIS_MODULE = "ParseContent" + val DATA_BUFSIZE = 1024 + val dataBuffer = Array.array(DATA_BUFSIZE,0w0:UniChar.Char) + + (*--------------------------------------------------------------------*) + (* skip a cdata section, the initial "' Char* )) [[ *) + (* [21] CDEnd ::= ']]>' *) + (* *) + (* don't care abeout whether "CDATA[" is present. just skip until the *) + (* next "]]>" or entity end. *) + (* *) + (* return the remaining char and state. *) + (*--------------------------------------------------------------------*) + (* might raise: none *) + (*--------------------------------------------------------------------*) + fun skipBadSection caq = + let(*--------------------------------------------------------------*) + (* for a sequence of "]"s, check whether the last two are *) + (* followed by a ">" *) + (*--------------------------------------------------------------*) + fun checkEnd aq = + let val (c1,a1,q1) = getChar aq + in case c1 + of 0wx3E (* #">" *) => getChar(a1,q1) + | 0wx5D (* #"]" *) => checkEnd(a1,q1) + | _ => doit(c1,a1,q1) + end + and doit (c,a,q) = + case c + of 0wx00 => let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_CDATA)) + in (c,a1,q) + end + | 0wx5D (* #"]" *) => let val (c1,a1,q1) = getChar(a,q) + in if c1=0wx5D (* #"]" *) then checkEnd(a1,q1) + else doit (c1,a1,q1) + end + | _ => doit (getChar(a,q)) + in doit caq + end + + (*--------------------------------------------------------------------*) + (* parse a cdata section, the initial "' Char* )) [[ *) + (* [21] CDEnd ::= ']]>' *) + (* *) + (* print an error and finish if an entity end is found. *) + (* *) + (* return the data as a Vector option and the next char & state. *) + (*--------------------------------------------------------------------*) + (* might raise: none *) + (*--------------------------------------------------------------------*) + fun parseCDataSection' (aq as (_,q)) = + let + (*--------------------------------------------------------------*) + (* for a sequence of "]"s, check whether the last two are *) + (* followed by a ">" *) + (*--------------------------------------------------------------*) + fun doEnd (text,q0,q1) (a2,q2) = + let val (c3,a3,q3) = getChar (a2,q2) + in case c3 + of 0wx00 => + let val a4 = hookError(a3,(getPos q3,ERR_ENDED_BY_EE LOC_CDATA)) + in (0wx5D::text,getPos q2,(c3,a4,q3)) + end + | 0wx3E => (* #">" *) (text,getPos q0,getChar(a3,q3)) + | 0wx5D => doEnd (0wx5D::text,q1,q2) (a3,q3) + | _ => doit (c3::0wx5D::0wx5D::text) (a3,q3) + end + and doBrack (text,q0) (a1,q1) = + let val (c2,a2,q2) = getChar(a1,q1) + in case c2 + of 0wx00 => + let val a3 = hookError(a2,(getPos q2,ERR_ENDED_BY_EE LOC_CDATA)) + in (0wx5D::text,getPos q1,(c2,a3,q2)) + end + | 0wx5D (* #"]" *) => doEnd (text,q0,q1) (a2,q2) + | _ => doit (c2::0wx5D::text) (a2,q2) + end + and doit text (a,q) = + let val (c1,a1,q1) = getChar(a,q) + in case c1 + of 0wx00 => + let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_CDATA)) + in (text,getPos q,(c1,a2,q1)) + end + | 0wx5D (* #"]" *) => doBrack (text,q) (a1,q1) + | _ => doit (c1::text) (a1,q1) + end + val (c1,a1,q1) = getChar aq + val startPos = getPos q1 + val (cs,endPos,(c2,a2,q2)) = + case c1 + of 0wx00 => + let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_CDATA)) + in (nil,getPos q,(c1,a2,q1)) + end + | 0wx5D (* #"]" *) => doBrack (nil,q) (a1,q1) + | _ => doit [c1] (a1,q1) + val text = Data2Vector(rev cs) + val a3 = hookCData(a1,((startPos,endPos),text)) + in (c2,a3,q2) + end + (*--------------------------------------------------------------------*) + (* parse a cdata section, the initial "' Char* )) [[ *) + (* [21] CDEnd ::= ']]>' *) + (* *) + (* print an error and skip the section if no name or a name other *) + (* than CDATA comes first, or no '[' follows the name. *) + (* *) + (* return the text of the section together with the remaining state. *) + (*--------------------------------------------------------------------*) + (* might raise: none *) + (*--------------------------------------------------------------------*) + fun parseCDataSection startPos aq = + let + val caq0 as (_,_,q0) = (getChar aq) + val (name,(c1,a1,q1)) = parseName caq0 + handle NotFound (c,a,q) => let val err = expectedOrEnded(expCdata,LOC_CDATA) c + in raise SyntaxError(c,hookError(a,(getPos q,err)),q) + end + + val _ = if name = [0wx43,0wx44,0wx41,0wx54,0wx41] (* "CDATA" *) then () + else let val err = ERR_EXPECTED(expCdata,name) + in raise SyntaxError(c1,hookError(a1,(getPos q0,err)),q1) + end + + val _ = if c1=0wx5B (* #"[" *) then () + else let val err = expectedOrEnded(expLbrack,LOC_CDATA) c1 + in raise SyntaxError(c1,hookError(a1,(getPos q1,err)),q1) + end + in + parseCDataSection'(a1,q1) + end + handle SyntaxError caq => skipBadSection caq + + (*--------------------------------------------------------------------*) + (* parse element or empty content. The second arg holds the unique *) + (* number of the element's first characters's entity, the index of *) + (* the current element, and the dfa for its content. Cf. 3: *) + (* *) + (* [39] element ::= EmptyElemTag *) + (* | STag content ETag *) + (* ... *) + (* Well-Formedness Constraint: Element Type Match *) + (* The Name in an element's end-tag must match the element type in *) + (* the start-tag. *) + (* *) + (* Validity Constraint: Element Valid *) + (* An element is valid if there is a declaration matching *) + (* elementdecl where the Name matches the element type, and one of *) + (* the following holds: *) + (* *) + (* 1. The declaration matches EMPTY and the element has no content. *) + (* 2. The declaration matches children and the sequence of child *) + (* elements belongs to the language generated by the regular *) + (* expression in the content model, with optional white space *) + (* (characters matching the nonterminal S) between each pair of *) + (* child elements. *) + (* *) + (* and 3.1: *) + (* *) + (* [43] content ::= (element | CharData | Reference | CDSect | PI *) + (* | Comment)* *) + (* 2.4: *) + (* The ampersand character (&) and the left angle bracket (<) may *) + (* appear in their literal form only when used as markup delimiters,*) + (* or within a comment, a processing instruction, or a CDATA *) + (* section... If they are needed elsewhere, they must be escaped *) + (* using either numeric character references or the strings "&" *) + (* and "<" respectively... *) + (* *) + (* consume the content of the element, accumulating it via the user *) + (* data functions (parameter a in subfunctions). trace the content *) + (* model of the element with a dfa transitions on a dfa state (para- *) + (* meter p in subfunctions). finish at the first end-tag, whether *) + (* matching or not, or at the document end. *) + (* *) + (* handle all syntax and other recoverable errors from subfunctions *) + (* and try to continue. *) + (* *) + (* return the accumulated user data and the next char and state. *) + (*--------------------------------------------------------------------*) + (* might raise: none *) + (*--------------------------------------------------------------------*) + fun parseElementContent dtd (openElems,startEnt,curr,dfa,ext,mt) caq = + let + (*--------------------------------------------------------------*) + (* check whether the dfa allows a transition/an end tag here. *) + (* print an error if not. After a transition return the new *) + (* dfa state. *) + (*--------------------------------------------------------------*) + fun fin_elem (a,pos,dfa,p) = + if dfaFinal(dfa,p) then a + else hookError(a,(pos,ERR_ENDED_EARLY(Index2Element dtd curr))) + fun trans_elem (a,q,dfa,p,el) = + let val p1 = dfaTrans(dfa,p,el) + in if p1<>dfaError then (p1,a) + else let val err = ERR_BAD_ELEM(Index2Element dtd curr,Index2Element dtd el) + in (p1,hookError(a,(getPos q,err))) + end + end + + (*--------------------------------------------------------------*) + (* consume all white space and skip all data until the next "<" *) + (* or "&". print an error for each sequence of data encountered.*) + (* *) + (* add the white space as data to the user data. *) + (* return the next char and state. *) + (*--------------------------------------------------------------*) + fun do_char_elem (c0,a0,q0) = + let + (*--------------------------------------------------------------*) + (* read data characters until the next "<", "&" or entity end. *) + (* add the data to the user data when an error occurs or no *) + (* more data follows. *) + (* *) + (* return the modified user data with the next char and state. *) + (*--------------------------------------------------------------*) + fun data_hook(a,q,cs) = + if null cs then a + else hookData(a,((getPos q0,getPos q),Data2Vector(rev cs),true)) + fun after_error (caq as (c,a,q)) = + case c + of 0wx00 => caq + | 0wx26 (* #"&" *) => caq + | 0wx3C (* #"<" *) => caq + | _ => after_error(getChar(a,q)) + fun do_data (yet,aq as (_,q)) = + let val (c1,a1,q1) = getChar aq + in case c1 + of 0wx00 => (c1,data_hook(a1,q,yet),q1) + | 0wx26 (* #"&" *) => (c1,data_hook(a1,q,yet),q1) + | 0wx3C (* #"<" *) => (c1,data_hook(a1,q,yet),q1) + | _ => + if isS c1 then do_data (c1::yet,(a1,q1)) + else let val a2 = data_hook(a1,q,yet) + val err = ERR_ELEM_CONTENT(IT_DATA nil) + val a3 = hookError(a2,(getPos q1,err)) + in after_error (getChar(a3,q1)) + end + end + in + if isS c0 then + let val a1 = if not (ext andalso standsAlone dtd) then a0 + else let val err = ERR_STANDALONE_ELEM(Index2Element dtd curr) + val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) + in hookError(a0,(getPos q0,err)) + end + in do_data ([c0],(a1,q0)) + end + else let val a1 = hookError(a0,(getPos q0,ERR_ELEM_CONTENT(IT_DATA nil))) + in after_error(getChar(a1,q0)) + end + end + (*--------------------------------------------------------------*) + (* consume a reference, handling errors by ignoring them. *) + (*--------------------------------------------------------------*) + fun do_ref (q,(c1,a1,q1)) = + if c1=0wx23 (* #"#" *) + (*------------------------------------------------------*) + (* it's a character reference. *) + (*------------------------------------------------------*) + then let val err = ERR_ELEM_CONTENT IT_CHAR_REF + val a2 = hookError(a1,(getPos q,err)) + in skipCharRef(a2,q1) + end + (*---------------------------------------------------------*) + (* it's a general entity reference. *) + (*---------------------------------------------------------*) + else let val ((id,ent),(a2,q2)) = parseGenRef dtd (c1,a1,q1) + in case ent + of GE_NULL => + let val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,false)) + in (getChar(a3,q2)) + end + | GE_INTERN(_,rep) => + let + val q3 = pushIntern(q2,id,false,rep) + val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,true)) + in (getChar(a3,q3)) + end + | GE_EXTERN ext => + if !O_VALIDATE orelse !O_INCLUDE_EXT_PARSED + then + let + val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,true)) + val caq4 = #3(openExtern (id,false,resolveExtId ext) (a3,q2)) + handle CantOpenFile(fmsg,a) + => let val err = ERR_NO_SUCH_FILE fmsg + val a2 = hookError(a,(getPos q2,err)) + val a3 = hookEntEnd(a2,getPos q2) + in (getChar(a3,q2)) + end + in caq4 + end + else let val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,false)) + in getChar(a3,q2) + end + | GE_UNPARSED _ => + raise InternalError + (THIS_MODULE,"parseElementContent", + "parseGenRef returned GE_UNPARSED") + end + (*-------------------------------------------------------*) + (* handle any errors in references by ignoring them. *) + (*-------------------------------------------------------*) + handle SyntaxError caq => caq + | NoSuchEntity aq => getChar aq + + (*--------------------------------------------------------------*) + (* handle an end-tag. finish the element in the user data and *) + (* return. *) + (* *) + (* print an error if the element's content is not yet finished. *) + (* print an error if the end-tag is for another element. *) + (* print an error if the element's first character was not in *) + (* the same entity. *) + (*--------------------------------------------------------------*) + and do_etag (p,etag as (elem,space,startPos,endPos),(c,a,q)) = + let + fun checkNesting a = + if getEntId q=startEnt then a + else hookError(a,(startPos,ERR_ELEM_ENT_NESTING(Index2Element dtd curr))) + in + if elem=curr then let val a1 = fin_elem (a,startPos,dfa,p) + val a2 = checkNesting a1 + val a3 = hookEndTag + (a2,((startPos,endPos),curr,SOME(elem,space))) + in (NONE,(c,a3,q)) + end + else if member elem openElems + then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) + val a1 = hookError(a,(startPos,err)) + val a2 = fin_elem (a1,startPos,dfa,p) + val a3 = hookEndTag(a2,((startPos,endPos),curr,NONE)) + in (SOME etag,(c,a3,q)) + end + else if dfaFinal(dfa,p) + then let val err = ERR_ELEM_TYPE_MATCH(Index2Element dtd curr, + Index2Element dtd elem) + val a1 = hookError(a,(startPos,err)) + val a2 = checkNesting a1 + val a3 = hookEndTag(a2,((startPos,endPos),curr,SOME(elem,space))) + in (NONE,(c,a3,q)) + end + else let val err = ERR_IGNORED_END_TAG(Index2Element dtd curr, + Index2Element dtd elem) + val a1 = hookError(a,(startPos,err)) + in do_elem(p,(c,a1,q)) + end + end + + (*--------------------------------------------------------------*) + (* handle a declaration, proc. instr or tag. *) + (*--------------------------------------------------------------*) + and do_lt (p,q,(c1,a1,q1)) = + case c1 + of 0wx21 (* #"!" *) => + (*------------------------------------------------------*) + (* its a declaration, cdata section or comment. *) + (* Only comments are valid. *) + (*------------------------------------------------------*) + let val (c2,a2,q2) = getChar(a1,q1) + val caq3 = + case c2 + of 0wx2D (* #"-" *) => + let val (c3,a3,q3) = getChar(a2,q2) + in if c3=0wx2D then parseComment (getPos q) (a3,q3) + else let val err = ERR_EXPECTED(expDash,[c3]) + val a4 = hookError(a3,(getPos q3,err)) + in recoverDecl false (c3,a4,q3) + end + end + | 0wx5B (* #"[" *) => + let val a3 = hookError(a2,(getPos q2,ERR_ELEM_CONTENT IT_CDATA)) + in skipBadSection (getChar(a3,q2)) + end + | _ => (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expDash,[c2]))),q2) + in do_elem(p,caq3) + end + | 0wx2F (* #"/" *) => + (let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) + in do_etag (p,(elem,space,getPos q,endPos),caq2) + end + handle SyntaxError caq => do_elem(p,caq)) + | 0wx3F (* #"?" *) => do_elem (p,parseProcInstr (getPos q) (a1,q1)) + | _ => + (*------------------------------------------------------*) + (* it's a start tag. the recursive call to parseElement *) + (* might return an end-tag that has to be consumed. *) + (*------------------------------------------------------*) + if isNms c1 then + let val (p1,(opt,caq2)) = + (let val (stag as ((_,elem,_,_,_),_),(c2,a2,q2)) = + parseSTag dtd (getPos q) (c1,a1,q1) + val (p1,a3) = trans_elem (a2,q1,dfa,p,elem) + in (p1,parseElement (dtd,curr::openElems,q,stag,(c2,a3,q2))) + end) + handle SyntaxError caq => (p,(NONE,caq)) + in case opt + of NONE => do_elem (p1,caq2) + | SOME etag => do_etag (p1,etag,caq2) + end + else let val err = ERR_FORBIDDEN_HERE(IT_CHAR 0wx3C,LOC_CONTENT) + val a2 = hookError(a1,(getPos q,err)) + in do_elem (p,(c1,a2,q1)) + end + + (*--------------------------------------------------------------*) + (* do element content. handle the document end by printing an *) + (* error and finishing like with an end-tag. *) + (*--------------------------------------------------------------*) + and do_elem (p,(c,a,q)) = + case c + of 0wx00 => if isSpecial q + then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) + val a1 = hookError(a,(getPos q,err)) + val pos = getPos q + val a2 = fin_elem (a1,pos,dfa,p) + val a3 = hookEndTag(a2,((pos,pos),curr,NONE)) + in (NONE,(c,a3,q)) + end + else let val a1 = hookEntEnd(a,getPos q) + in do_elem (p,getChar(a1,q)) + end + | 0wx26 (* #"&" *) => do_elem (p,do_ref (q,getChar(a,q))) + | 0wx3C (* #"<" *) => do_lt (p,q,getChar(a,q)) + | _ => do_elem (p,do_char_elem (c,a,q)) + + (*--------------------------------------------------------------*) + (* do empty content. if the first thing to come is the current *) + (* element's end-tag, finish it. Otherwise print an error and *) + (* continue as for element content. *) + (*--------------------------------------------------------------*) + and do_empty (c,a,q) = + if c<>0wx3C (* #"<" *) + then let val a1 = hookError(a,(getPos q,ERR_NONEMPTY(Index2Element dtd curr))) + in do_elem (dfaInitial,(c,a1,q)) + end + else + let val (c1,a1,q1) = getChar(a,q) + in if c1<>0wx2F (* #"/" *) + then let val err = ERR_NONEMPTY(Index2Element dtd curr) + val a2 = hookError(a1,(getPos q,err)) + in do_lt (dfaInitial,q,(c1,a2,q1)) + end + else let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) + in do_etag (dfaInitial,(elem,space,getPos q,endPos),caq2) + end + handle SyntaxError caq => do_elem (dfaInitial,caq) + end + + in if mt then do_empty caq + else do_elem (dfaInitial,caq) + end + + (*--------------------------------------------------------------------*) + (* parse mixed or any content. The second arg holds the unique number *) + (* of the element's first characters's entity, the idx of the current *) + (* element, and a function for validating child elements. Cf. 3: *) + (* *) + (* [39] element ::= EmptyElemTag *) + (* | STag content ETag *) + (* ... *) + (* Well-Formedness Constraint: Element Type Match *) + (* The Name in an element's end-tag must match the element type in *) + (* the start-tag. *) + (* *) + (* Validity Constraint: Element Valid *) + (* An element is valid if there is a declaration matching *) + (* elementdecl where the Name matches the element type, and one of *) + (* the following holds: *) + (* ... *) + (* 3. The declaration matches Mixed and the content consists of *) + (* character data and child elements whose types match names in *) + (* the content model. *) + (* 4. The declaration matches ANY, and the types of any child *) + (* elements have been declared. *) + (* *) + (* 3.1: *) + (* *) + (* [43] content ::= (element | CharData | Reference | CDSect | PI *) + (* | Comment)* *) + (* 2.4: *) + (* The ampersand character (&) and the left angle bracket (<) may *) + (* appear in their literal form only when used as markup delimiters,*) + (* or within a comment, a processing instruction, or a CDATA *) + (* section... If they are needed elsewhere, they must be escaped *) + (* using either numeric character references or the strings "&" *) + (* and "<" respectively. The right angle bracket (>) may be *) + (* represented using the string ">", and must, for compatibility,*) + (* be escaped using ">" or a character reference when it appears *) + (* in the string "]]>" in content, when that string is not marking *) + (* the end of a CDATA section. *) + (* *) + (* consume the content of the element, accumulating it via the user *) + (* data functions (parameter a in subfunctions). for each child, *) + (* check whether it was specified in the element's Mixed content *) + (* specification (validate). finish at the first end-tag, whether *) + (* matching or not, or at the document end. *) + (* *) + (* handle all syntax and other recoverable errors from subfunctions *) + (* and try to continue. *) + (* *) + (* return the accumulated user data and the next char and state. *) + (*--------------------------------------------------------------------*) + (* might raise: none *) + (*--------------------------------------------------------------------*) + and parseMixedContent dtd (openElems,startEnt,curr,validate) caq = + let + (*--------------------------------------------------------------*) + (* read data characters until the next "<", "&" or entity end. *) + (* add the data to the user data when an error occurs or no *) + (* more data follows. *) + (* *) + (* return the modified user data with the next char and state. *) + (*--------------------------------------------------------------*) + fun do_data (br,(c0,a0,q0)) = + let + val pos0 = ref (getPos q0) + val _ = Array.update(dataBuffer,0,c0) + + fun data_hook (i,(a,q)) = + hookData(a,((!pos0,getPos q),Array.extract(dataBuffer,0,SOME i),false)) + fun takeOne (c,qE,i,aq as (a,q)) = + if i (c1,data_hook(i,(a1,q)),q1) + | 0wx26 (* #"&" *) => (c1,data_hook(i,(a1,q)),q1) + | 0wx3C (* #"<" *) => (c1,data_hook(i,(a1,q)),q1) + | 0wx5D (* #"]" *) => do_br (n+1,takeOne(c1,q,i,(a1,q1))) + | 0wx3E (* #">" *) => + let val a2 = if n=1 then a1 + else hookError(a1,(getPos q1,ERR_MUST_ESCAPE c1)) + in doit (takeOne(c1,q,i,(a2,q1))) + end + | _ => doit (takeOne(c1,q,i,(a1,q1))) + end + and doit (i,aq as (_,q)) = + let val (c1,a1,q1) = getChar aq + in case c1 + of 0wx00 => (c1,data_hook(i,(a1,q)),q1) + | 0wx26 (* #"&" *) => (c1,data_hook(i,(a1,q)),q1) + | 0wx3C (* #"<" *) => (c1,data_hook(i,(a1,q)),q1) + | 0wx5D (* #"]" *) => if !O_COMPATIBILITY + then do_br (1,takeOne(c1,q,i,(a1,q1))) + else doit (takeOne(c1,q,i,(a1,q1))) + | _ => doit (takeOne(c1,q,i,(a1,q1))) + end + in + if br then do_br (1,(1,(a0,q0))) + else doit (1,(a0,q0)) + end + (* + fun do_data (br,(c0,a0,q0)) = + let + fun data_hook (yet,(a,q)) = + hookData(a,((getPos q0,getPos q),Data2Vector(rev yet),false)) + fun do_br (n,yet,aq as (_,q)) = + let val (c1,a1,q1) = getChar aq + in case c1 + of 0wx00 => (c1,data_hook(yet,(a1,q)),q1) + | 0wx26 (* #"&" *) => (c1,data_hook(yet,(a1,q)),q1) + | 0wx3C (* #"<" *) => (c1,data_hook(yet,(a1,q)),q1) + | 0wx5D (* #"]" *) => do_br (n+1,c1::yet,(a1,q1)) + | 0wx3E (* #">" *) => + let val a2 = if n=1 then a1 + else hookError(a1,(getPos q1,ERR_MUST_ESCAPE c1)) + in doit (c1::yet,(a2,q1)) + end + | _ => doit (c1::yet,(a1,q1)) + end + and doit (yet,aq as (_,q)) = + let val (c1,a1,q1) = getChar aq + in case c1 + of 0wx00 => (c1,data_hook(yet,(a1,q)),q1) + | 0wx26 (* #"&" *) => (c1,data_hook(yet,(a1,q)),q1) + | 0wx3C (* #"<" *) => (c1,data_hook(yet,(a1,q)),q1) + | 0wx5D (* #"]" *) => if !O_COMPATIBILITY + then do_br (1,c1::yet,(a1,q1)) + else doit (c1::yet,(a1,q1)) + | _ => doit (c1::yet,(a1,q1)) + end + in + if br then do_br (1,[0wx5D],(a0,q0)) + else doit ([c0],(a0,q0)) + end + *) + + (*--------------------------------------------------------------*) + (* consume a reference, handling errors by ignoring them. *) + (*--------------------------------------------------------------*) + fun do_ref (q0,(c,a,q)) = + if c=0wx23 (* #"#" *) + (*------------------------------------------------------*) + (* it's a character reference. *) + (*------------------------------------------------------*) + then let val (cs,(ch,a1,q1)) = parseCharRefLit [0wx23,0wx26] (a,q) + val cv = Data2Vector(rev cs) + val a2 = hookCharRef(a1,((getPos q0,getPos q1),ch,cv)) + in getChar(a2,q1) + end + handle SyntaxError caq => caq + | NoSuchChar aq => getChar aq + (*---------------------------------------------------------*) + (* it's a general entity reference. *) + (*---------------------------------------------------------*) + else let val ((id,ent),(a1,q1)) = parseGenRef dtd (c,a,q) + in case ent + of GE_NULL => + let val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,false)) + in getChar(a2,q1) + end + | GE_INTERN(_,rep) => + let + val q2 = pushIntern(q1,id,false,rep) + val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,true)) + in getChar(a2,q2) + end + | GE_EXTERN ext => + if !O_VALIDATE orelse !O_INCLUDE_EXT_PARSED + then + let + val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,true)) + val caq3 = #3(openExtern (id,false,resolveExtId ext) (a2,q1)) + handle CantOpenFile(fmsg,a) + => let val err = ERR_NO_SUCH_FILE fmsg + val a1 = hookError(a,(getPos q1,err)) + val a2 = hookEntEnd(a1,getPos q1) + in (getChar(a2,q1)) + end + in caq3 + end + else let val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,false)) + in getChar(a2,q1) + end + | GE_UNPARSED _ => + raise InternalError + ("THIS_MODULE","parseMixedContent", + "parseGenRef returned GE_UNPARSED") + end + (*-------------------------------------------------------*) + (* handle any errors in references by ignoring them. *) + (*-------------------------------------------------------*) + handle SyntaxError caq => caq + | NoSuchEntity aq => getChar aq + + (*--------------------------------------------------------------*) + (* handle an end-tag. finish the element in the user data and *) + (* return. *) + (* *) + (* print an error if the element's content is not yet finished. *) + (* print an error if the end-tag is for another element. *) + (* print an error if the element's first character was not in *) + (* the same entity. *) + (*--------------------------------------------------------------*) + and do_etag (etag as (elem,space,startPos,endPos),(c,a,q)) = + let + fun checkNesting a = + if getEntId q=startEnt then a + else hookError(a,(startPos,ERR_ELEM_ENT_NESTING(Index2Element dtd curr))) + in + if elem=curr then let val a1 = checkNesting a + val a2 = hookEndTag + (a1,((startPos,endPos),curr,SOME(elem,space))) + in (NONE,(c,a2,q)) + end + else if member elem openElems + then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) + val a1 = hookError(a,(startPos,err)) + val a2 = hookEndTag(a1,((startPos,endPos),curr,NONE)) + in (SOME etag,(c,a2,q)) + end + else let val err = ERR_ELEM_TYPE_MATCH(Index2Element dtd curr, + Index2Element dtd elem) + val a1 = hookError(a,(startPos,err)) + val a2 = checkNesting a1 + val a3 = hookEndTag(a2,((startPos,endPos),curr,SOME(elem,space))) + in (NONE,(c,a3,q)) + end + end + + (*--------------------------------------------------------------*) + (* handle a declaration, proc. instr or tag. If it is an end- *) + (* tag, finish the element in the user data and return. *) + (* *) + (* print an error if the element's content is not yet finished. *) + (* print an error if the end-tag is for another element. *) + (* print an error if the element's first character was not in *) + (* the same entity. *) + (*--------------------------------------------------------------*) + and do_lt (q,(c1,a1,q1)) = + case c1 + of 0wx21 (* #"!" *) => + (*------------------------------------------------------*) + (* its a declaration, cdata section or comment. *) + (* Only comments and cdata sections are valid. *) + (*------------------------------------------------------*) + let val (c2,a2,q2) = getChar(a1,q1) + val caq3 = + case c2 + of 0wx2D (* #"-" *) => + let val (c3,a3,q3) = getChar(a2,q2) + in if c3=0wx2D then parseComment (getPos q) (a3,q3) + else let val err = ERR_EXPECTED(expDash,[c3]) + val a4 = hookError(a3,(getPos q3,err)) + in recoverDecl false (c3,a4,q3) + end + end + | 0wx5B (* #"[" *) => parseCDataSection (getPos q) (a2,q2) + | _ => + (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expDashLbrack,[c2]))),q2) + in do_mixed caq3 + end + | 0wx2F (* #"/" *) => + (let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) + in do_etag ((elem,space,getPos q,endPos),caq2) + end + handle SyntaxError caq => do_mixed caq) + | 0wx3F (* #"?" *) => do_mixed (parseProcInstr (getPos q) (a1,q1)) + | _ => + (*------------------------------------------------------*) + (* it's a start tag. the recursive call to parseElement *) + (* might return an end-tag that has to be consumed. *) + (*------------------------------------------------------*) + if isNms c1 then + let val (opt,caq2) = + (let val (stag as ((_,elem,_,_,_),_),(c2,a2,q2)) = + parseSTag dtd (getPos q) (c1,a1,q1) + val a3 = validate (a2,q1) elem + in parseElement (dtd,curr::openElems,q,stag,(c2,a3,q2)) + end + handle SyntaxError caq => (NONE,caq)) + in case opt + of NONE => do_mixed caq2 + | SOME etag => do_etag (etag,caq2) + end + else let val err = ERR_FORBIDDEN_HERE(IT_CHAR 0wx3C,LOC_CONTENT) + val a2 = hookError(a1,(getPos q,err)) + in do_mixed (c1,a2,q1) + end + + (*--------------------------------------------------------------*) + (* do mixed content. handle the document end by printing an *) + (* error and finishing like with an end-tag. *) + (*--------------------------------------------------------------*) + and do_mixed (c,a,q) = + case c + of 0wx00 => if isSpecial q + then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) + val a1 = hookError(a,(getPos q,err)) + val pos = getPos q + val a2 = hookEndTag(a1,((pos,pos),curr,NONE)) + in (NONE,(c,a2,q)) + end + else let val a1 = hookEntEnd(a,getPos q) + in do_mixed (getChar(a1,q)) + end + | 0wx26 (* #"&" *) => do_mixed (do_ref (q,getChar(a,q))) + | 0wx3C (* #"<" *) => do_lt (q,getChar(a,q)) + | 0wx5D => do_mixed (do_data (!O_COMPATIBILITY,(c,a,q))) + | _ => do_mixed (do_data (false,(c,a,q))) + in + do_mixed caq + end + + (*--------------------------------------------------------------------*) + (* parse an element, the start tag already read. the second arg holds *) + (* the number of the entity of the start-tag's first char, and the *) + (* start-tag information. The 1st arg is the start value for the user *) + (* data. 3: *) + (* *) + (* [39] element ::= EmptyElemTag *) + (* | STag content ETag *) + (* and 3.1: *) + (* *) + (* Empty-element tags may be used for any element which has no *) + (* content, whether or not it is declared using the keyword EMPTY. *) + (* For interoperability, the empty-element tag must be used, and *) + (* can only be used, for elements which are declared EMPTY. *) + (*--------------------------------------------------------------------*) + and parseElement (dtd,openElems,q0,(stag as (_,curr,_,_,mt),elemInfo),(c,a,q)) = + let + (*--------------------------------------------------------------*) + (* validate whether an element is allowed in mixed/any content. *) + (*--------------------------------------------------------------*) + fun trans_any (a,_) _ = a + fun trans_mixed is (a,q) i = + if member i is then a + else let val err = ERR_BAD_ELEM(Index2Element dtd curr,Index2Element dtd i) + in hookError(a,(getPos q,err)) + end + in + (*-----------------------------------------------------------*) + (* For empty-element tags, verify that the element's declar. *) + (* allows empty content. *) + (*-----------------------------------------------------------*) + if mt then + let val a1 = + if not (!O_VALIDATE andalso hasDtd dtd) then a + else + case #decl elemInfo + of (SOME(CT_EMPTY,_)) => a + | (SOME(CT_ELEMENT(_,dfa),_)) => + if not (dfaFinal(dfa,dfaInitial)) + then hookError(a,(getPos q0,ERR_EMPTY_TAG(Index2Element dtd curr))) + else if not (!O_INTEROPERABILITY) then a + else hookError + (a,(getPos q0,ERR_EMPTY_TAG_INTER (Index2Element dtd curr))) + | _ => if not (!O_INTEROPERABILITY) then a + else hookError(a,(getPos q0,ERR_EMPTY_TAG_INTER + (Index2Element dtd curr))) + in (NONE,(c,hookStartTag(a1,stag),q)) + end + (*-----------------------------------------------------------*) + (* for normal start-tags, check whether the element's decl. *) + (* requires an empty-element tag, or empty content, then *) + (* call the appropriate function that parses the content. *) + (*-----------------------------------------------------------*) + else + let val startEnt = getEntId q0 + in if !O_VALIDATE then + case getOpt(#decl elemInfo,(CT_ANY,false)) + of (CT_ANY,_) => parseMixedContent dtd + (openElems,startEnt,curr,trans_any) (c,hookStartTag(a,stag),q) + | (CT_MIXED is,_) => parseMixedContent dtd + (openElems,startEnt,curr,trans_mixed is) (c,hookStartTag(a,stag),q) + | (CT_ELEMENT(_,dfa),ext) => parseElementContent dtd + (openElems,startEnt,curr,dfa,ext,false) + (c,hookStartTag(a,stag),q) + | (CT_EMPTY,_) => + let val a1 = if not (!O_INTEROPERABILITY) then a + else let val err = ERR_MUST_BE_EMPTY(Index2Element dtd curr) + in hookError(a,(getPos q0,err)) + end + val a2 = hookStartTag(a1,stag) + in parseElementContent dtd + (openElems,startEnt,curr,emptyDfa,false,true) (c,a2,q) + end + else parseMixedContent dtd + (openElems,startEnt,curr,trans_any) (c,hookStartTag(a,stag),q) + end + end +end diff -uNr fxp-2.0.orig/src/Parser/Parse/parseContent.sml fxp-2.0/src/Parser/Parse/parseContent.sml --- fxp-2.0.orig/src/Parser/Parse/parseContent.sml Sat Jun 26 02:42:54 2004 +++ fxp-2.0/src/Parser/Parse/parseContent.sml Thu Nov 1 08:59:03 2007 @@ -587,7 +587,9 @@ val _ = Array.update(dataBuffer,0,c0) fun data_hook (i,(a,q)) = - hookData(a,((!pos0,getPos q),Array.extract(dataBuffer,0,SOME i),false)) + hookData(a,((!pos0,getPos q), + ArraySlice.vector(ArraySlice.slice(dataBuffer,0,SOME i)), + false)) fun takeOne (c,qE,i,aq as (a,q)) = if i 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Parser/parser.cm fxp-2.0/src/Parser/parser.cm --- fxp-2.0.orig/src/Parser/parser.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Parser/parser.cm Thu Nov 1 08:59:04 2007 @@ -0,0 +1,32 @@ +Group + functor Parse + + structure Base + + signature Entities + functor Entities + structure Version + + structure DfaData + structure Errors + + signature Resolve + structure ResolveNull + signature Dtd + structure Dtd + structure HookData + structure IgnoreHooks + signature Hooks + signature ParserOptions + functor ParserOptions +is + version.sml + entities.cm + + Base/base.cm + Dfa/dfa.cm + Dtd/dtd.cm + Error/error.cm + Params/params.cm + Parse/parse.cm + diff -uNr fxp-2.0.orig/src/Unicode/Chars/charClasses.orig.sml fxp-2.0/src/Unicode/Chars/charClasses.orig.sml --- fxp-2.0.orig/src/Unicode/Chars/charClasses.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Chars/charClasses.orig.sml Thu Nov 1 08:59:08 2007 @@ -0,0 +1,158 @@ +(*--------------------------------------------------------------------------*) +(* Structure: CharClasses *) +(* *) +(* Notes: *) +(* This implementation uses the UNSAFE array operations, and does NO *) +(* range checks. This is for efficiency reasons. *) +(* If class=makeCharClass(lo,hi) then a filed of size hi-lo+1 is allo- *) +(* cated. In order to lookup a character, first make sure it in [lo..hi], *) +(* then subtract lo before calling inCharClass! *) +(* The same holds for addChar. *) +(* *) +(* Depends on: *) +(* UniChar *) +(* UtilInt *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* addChar : none *) +(* addCharClass : none *) +(* inCharClass : none *) +(* makeCharClass : none *) +(*--------------------------------------------------------------------------*) +signature CharClasses = + sig + type CharClass + type MutableClass + type CharInterval = UniChar.Char * UniChar.Char + type CharRange = CharInterval list + + val initialize : CharInterval -> MutableClass + val finalize : MutableClass -> CharClass + + val addChar : MutableClass * UniChar.Char * UniChar.Char * UniChar.Char -> unit + val addCharRange : MutableClass * UniChar.Char * UniChar.Char * CharRange -> CharRange + + val inCharClass : UniChar.Char * CharClass -> bool + end + +structure CharClasses : CharClasses = + struct + open UniChar + + type CharInterval = Char * Char + type CharRange = CharInterval list + + val Char2Word = Word.fromLargeWord o Chars.toLargeWord + + (*--------------------------------------------------------------------*) + (* helpers *) + (*--------------------------------------------------------------------*) + infix 5 >> >>> <<< + infix 6 || ||| + infix 6 -- + infix 7 & && &&& + val op >> = Chars.>> + val op -- = Chars.- + val op || = Chars.orb + val op && = Chars.andb + val op >>> = Word32.>> + val op <<< = Word32.<< + val op &&& = Word32.andb + val op ||| = Word32.orb + val op & = Word.andb + + val max32 = Word32.notb 0wx0 + + (*--------------------------------------------------------------------*) + (* a char class is an array of words, interpreted as bitvectors. *) + (*--------------------------------------------------------------------*) + type MutableClass = Word32.word array + type CharClass = Word32.word vector + + (*--------------------------------------------------------------------*) + (* each word in a char class holds 32 entries. Thus the for a char c *) + (* is c div 32 == c >> 5. The bitmask is a word of zeros, only the *) + (* significant bit for c, i.e. the (c && 31==0x1F)th bit set to one. *) + (*--------------------------------------------------------------------*) + fun indexMask c = let val idx = Chars.toInt(c>>0w5) + val mask = 0wx1 <<< Char2Word c & 0w31 + in (idx,mask) + end + + (*--------------------------------------------------------------------*) + (* generate index and mask, then lookup. *) + (*--------------------------------------------------------------------*) + fun inCharClass(c,vec) = let val (idx,mask) = indexMask c + in mask &&& Vector.sub(vec,idx) <> 0wx0 + end + + (*--------------------------------------------------------------------*) + (* generate a CharClass large enough to hold (max-min+1) characters. *) + (*--------------------------------------------------------------------*) + fun initialize(min,max) = + Array.array((Chars.toInt max-Chars.toInt min+1) div 32+1,0wx0):MutableClass + fun finalize arr = Array.extract(arr,0,NONE) + + (*--------------------------------------------------------------------*) + (* add a single character to a CharClass. *) + (*--------------------------------------------------------------------*) + fun addChar(cls,min,max,c) = + let + val (idx,new) = indexMask c + val old = Array.sub(cls,idx) + in + Array.update(cls,idx,old|||new) + end + + (*--------------------------------------------------------------------*) + (* add a full range of characters to a CharClass. *) + (* this is the only function that computes the offset before access *) + (* to the array. *) + (*--------------------------------------------------------------------*) + fun addCharRange(cls,min,max,range) = (* returns intervals from range which are not between min and max *) + let + fun doOne (lo,hi) = + let + val (l,h) = (lo-min,hi-min) + val (idxL,idxH) = ((Chars.toInt l) div 32,(Chars.toInt h) div 32) + val (bitL,bitH) = (Char2Word l & 0w31,Char2Word h & 0w31) + in + if idxL=idxH then + let + val new = (max32>>>(0w31-bitH+bitL))<<>>(0w31-bitH) + val oldL = Array.sub(cls,idxL) + val oldH = Array.sub(cls,idxH) + val _ = Array.update(cls,idxL,oldL|||newL) + val _ = Array.update(cls,idxH,oldH|||newH) + val _ = UtilInt.appInterval (fn i => Array.update(cls,i,max32)) + (idxL+1,idxH-1) + in () + end + else () + end + fun doAll nil = nil + | doAll ((lh as (lo,hi))::lhs) = + if himax then lh::doAll lhs + else if lo=min andalso hi<=max + then (doOne lh; doAll lhs) + else if lo>=min andalso hi>max + then (doOne(lo,max); (max+0w1,hi)::lhs) + else (doOne(min,max); (max+0w1,hi)::lhs) + val _ = doAll range + in + doAll range + end + end + diff -uNr fxp-2.0.orig/src/Unicode/Chars/charClasses.sml fxp-2.0/src/Unicode/Chars/charClasses.sml --- fxp-2.0.orig/src/Unicode/Chars/charClasses.sml Sat Jun 26 02:42:55 2004 +++ fxp-2.0/src/Unicode/Chars/charClasses.sml Thu Nov 1 08:59:07 2007 @@ -91,7 +91,7 @@ (*--------------------------------------------------------------------*) fun initialize(min,max) = Array.array((Chars.toInt max-Chars.toInt min+1) div 32+1,0wx0):MutableClass - fun finalize arr = Array.extract(arr,0,NONE) + fun finalize arr = Array.vector arr (*--------------------------------------------------------------------*) (* add a single character to a CharClass. *) diff -uNr fxp-2.0.orig/src/Unicode/Chars/chars.cm fxp-2.0/src/Unicode/Chars/chars.cm --- fxp-2.0.orig/src/Unicode/Chars/chars.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Chars/chars.cm Thu Nov 1 08:59:08 2007 @@ -0,0 +1,25 @@ +Group + structure DataDict + structure DataSymTab + signature UniChar + structure UniChar + signature UniClasses + structure UniClasses +is +#if (SMLNJ_MINOR_VERSION > 40) + charClasses.sml + uniChar.sml + $/basis.cm +#else + charClasses.orig.sml + uniChar.orig.sml +#endif + charVecDict.sml + dataDict.sml + testClasses.sml + uniClasses.sml + uniRanges.sml + + ../../Util/util.cm + ../../Util/SymDict/symdict.cm + diff -uNr fxp-2.0.orig/src/Unicode/Chars/uniChar.orig.sml fxp-2.0/src/Unicode/Chars/uniChar.orig.sml --- fxp-2.0.orig/src/Unicode/Chars/uniChar.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Chars/uniChar.orig.sml Thu Nov 1 08:59:08 2007 @@ -0,0 +1,125 @@ +(*--------------------------------------------------------------------------*) +(* Structure: UniChar *) +(* *) +(* Depends on: *) +(* UtilString *) +(* *) +(* Exceptions raised by functions in this structure: *) +(*--------------------------------------------------------------------------*) +signature UniChar = + sig + structure Chars : WORD + + type Char = Chars.word + type Data = Char list + type Vector = Char vector + + val nullData : Data + val nullVector : Vector + + val hashChar : Char -> word + val hashData : Data -> word + val hashVector : Vector -> word + + val compareChar : Char * Char -> order + val compareData : Data * Data -> order + val compareVector : Vector * Vector -> order + + val char2Char : char -> Char + val Char2char : Char -> char + + val Char2Uni : Char -> string + val Char2String : Char -> string + + val String2Data : string -> Data + val Data2String : Data -> string + val Latin2String : Data -> string + + val Data2Vector : Data -> Vector + val Vector2Data : Vector -> Data + + val String2Vector : string -> Vector + val Vector2String : Vector -> string + + val quoteUni : Char -> string -> string + val quoteChar : Char -> Char -> string + val quoteData : Char -> Data -> string + val quoteVector : Char -> Vector -> string + end + +structure UniChar : UniChar = + struct + val O_VECTOR_PRINTLEN = 48 + + structure Chars = Word + + val _ = if Chars.wordSize > 21 then () + else let val str = ("UniChar: Chars.wordSize is too small.\n"^ + "Cannot compile on this system!\n" ) + val _ = print str + in raise Fail str + end + + type Char = Chars.word + type Data = Char list + + type CharInterval = Char * Char + type CharRange = CharInterval list + + type Vector = Char vector + + val nullChar = 0wx0:Char + val nullData = nil:Data + val nullVector = Vector.fromList nullData + + val hashChar = Word.fromLargeWord o Chars.toLargeWord + val hashData = UtilHash.hashList hashChar + val hashVector = UtilHash.hashVector hashChar + + val compareChar = Chars.compare + val compareData = UtilCompare.compareList compareChar + val compareVector = UtilCompare.compareVector compareChar + + val char2Char = Chars.fromLargeWord o Word8.toLargeWord o Byte.charToByte + val Char2char = Byte.byteToChar o Word8.fromLargeWord o Chars.toLargeWord + + fun Char2Uni c = + "U+"^UtilString.toUpperString(StringCvt.padLeft #"0" 4 (Chars.toString c)) + fun Char2String c = + case c + of 0wx9 => "\\t" + | 0wxA => "\\n" + | _ => if c<0wx100 then String.implode [Char2char c] + else Char2Uni c + + fun String2Data s = map char2Char (String.explode s) + fun Data2String cs = String.concat (map Char2String cs) + fun Latin2String cs = String.implode (map Char2char cs) + + val Data2Vector = Vector.fromList + fun String2Vector s = Vector.tabulate(String.size s,fn i => char2Char(String.sub(s,i))) + + fun Vector2Data vec = Vector.foldr (op ::) nil vec + fun Vector2String vec = + let + val maxlen = O_VECTOR_PRINTLEN + val len = Vector.length vec + in + if len<=maxlen orelse maxlen=0 + then Data2String (Vector2Data vec) + else let + val cs1 = Vector.foldri + (fn (_,c,cs) => c::cs) nil (vec,0,SOME (maxlen div 2)) + val cs2 = Vector.foldri + (fn (_,c,cs) => c::cs) nil (vec,len-3-maxlen div 2,NONE) + in Data2String cs1^"..."^Data2String cs2 + end + end + + fun quoteUni q s = let val sQ = Char2String q in sQ^s^sQ end + fun quoteChar q c = if c=0wx0 then "entity end" else quoteUni q (Char2String c) + fun quoteData q cs = quoteUni q (Data2String cs) + fun quoteVector q v = quoteUni q (Vector2String v) + end + + diff -uNr fxp-2.0.orig/src/Unicode/Chars/uniChar.sml fxp-2.0/src/Unicode/Chars/uniChar.sml --- fxp-2.0.orig/src/Unicode/Chars/uniChar.sml Sat Jun 26 02:42:55 2004 +++ fxp-2.0/src/Unicode/Chars/uniChar.sml Thu Nov 1 08:59:07 2007 @@ -108,10 +108,14 @@ if len<=maxlen orelse maxlen=0 then Data2String (Vector2Data vec) else let - val cs1 = Vector.foldri - (fn (_,c,cs) => c::cs) nil (vec,0,SOME (maxlen div 2)) - val cs2 = Vector.foldri - (fn (_,c,cs) => c::cs) nil (vec,len-3-maxlen div 2,NONE) + val cs1 = VectorSlice.foldri + (fn (_,c,cs) => c::cs) + nil + (VectorSlice.slice (vec,0,SOME (maxlen div 2))) + val cs2 = VectorSlice.foldri + (fn (_,c,cs) => c::cs) + nil + (VectorSlice.slice (vec,len-3-maxlen div 2,NONE)) in Data2String cs1^"..."^Data2String cs2 end end diff -uNr fxp-2.0.orig/src/Unicode/Decode/decode.cm fxp-2.0/src/Unicode/Decode/decode.cm --- fxp-2.0.orig/src/Unicode/Decode/decode.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Decode/decode.cm Thu Nov 1 08:59:07 2007 @@ -0,0 +1,26 @@ +Group + signature Decode + structure Decode + signature DecodeFile + structure DecodeFile +is + decode.sml + decodeError.sml + decodeFile.sml + decodeMisc.sml + decodeUcs2.sml + decodeUcs4.sml + decodeUtf16.sml + decodeUtf8.sml + decodeUtil.sml + + ../Chars/chars.cm + ../Uri/uri.cm + ../../Util/util.cm + ../encoding.cm + ../../Util/SymDict/symdict.cm + +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Unicode/Decode/decode.sml fxp-2.0/src/Unicode/Decode/decode.sml --- fxp-2.0.orig/src/Unicode/Decode/decode.sml Sat Jun 26 02:42:55 2004 +++ fxp-2.0/src/Unicode/Decode/decode.sml Thu Nov 1 15:53:30 2007 @@ -227,7 +227,7 @@ end handle EndOfFile f => (nil,f) - fun detect bs = + fun detect (bs : Word8.word list) = case bs of [0wx0,0wx0,0wxFE,0wxFF] => (UCS4B,nil) diff -uNr fxp-2.0.orig/src/Unicode/Encode/encode.cm fxp-2.0/src/Unicode/Encode/encode.cm --- fxp-2.0.orig/src/Unicode/Encode/encode.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Encode/encode.cm Thu Nov 1 08:59:07 2007 @@ -0,0 +1,17 @@ +Group + signature Encode + structure Encode +is + encode.sml + encodeBasic.sml + encodeError.sml + encodeMisc.sml + + ../Chars/chars.cm + ../../Util/util.cm + ../encoding.cm + +#if (SMLNJ_MINOR_VERSION > 40) + $/basis.cm +#endif + diff -uNr fxp-2.0.orig/src/Unicode/Uri/uri.cm fxp-2.0/src/Unicode/Uri/uri.cm --- fxp-2.0.orig/src/Unicode/Uri/uri.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Uri/uri.cm Thu Nov 1 08:59:06 2007 @@ -0,0 +1,22 @@ +Group + signature Uri + structure Uri + structure UriDict +is +#if (SMLNJ_MINOR_VERSION > 40) + uri.sml + uriEncode.sml + $/basis.cm +#else + uri.orig.sml + uriEncode.orig.sml +#endif + uriDecode.sml + uriDict.sml + + ../../Util/util.cm + ../Chars/chars.cm + ../../config.cm + + ../../Util/SymDict/symdict.cm + diff -uNr fxp-2.0.orig/src/Unicode/Uri/uri.orig.sml fxp-2.0/src/Unicode/Uri/uri.orig.sml --- fxp-2.0.orig/src/Unicode/Uri/uri.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Uri/uri.orig.sml Thu Nov 1 08:59:06 2007 @@ -0,0 +1,197 @@ +(* +require "basis.__array"; +require "basis.__byte"; +require "basis.__string"; +require "basis.__vector"; +require "basis.__word"; +require "basis.__word8"; + +require "util.unsafe"; +require "util.utilInt"; + +require "chars"; +require "naming"; +*) +signature Uri = + sig + eqtype Uri + + val emptyUri : Uri + + val hashUri : Uri -> word + val compareUri : Uri * Uri -> order + + val uriJoin : Uri * Uri -> Uri + val uriSuffix : Uri -> string + + val Data2Uri : UniChar.Data -> Uri + val Vector2Uri : UniChar.Vector -> Uri + val String2Uri : string -> Uri + val Uri2String : Uri -> string + + val retrieveUri : Uri -> string * string * bool + end + +structure Uri :> Uri = + struct + open UniChar UniClasses UriDecode UriEncode UtilError UtilInt + + (*--------------------------------------------------------------------*) + (* decoding *) + (*--------------------------------------------------------------------*) + type Uri = string + + val emptyUri = "" + + val Vector2Uri = Vector2UriUtf8 + val Data2Uri = Data2UriUtf8 + val String2Uri = String2UriUtf8 + val Uri2String = decodeUriUtf8 + + val slash = "/" + + fun uriSuffix s = + let fun search i = if i<0 then NONE else case String.sub(s,i) + of #"." => SOME i + | #"/" => NONE + | _ => search (i-1) + in case search (String.size s-1) + of NONE => "" + | SOME i => String.extract(s,i+1,NONE) + end + + fun isScheme c = + Char.isAlphaNum c orelse #"+"=c orelse #"-"=c orelse #"."=c + + fun uriAbsolute uri = + let fun search i = + if i>=String.size uri then false + else let val c=String.sub(uri,i) + in if #":"=c then true else if isScheme c then search (i+1) + else false + end + in + if uri="" then false + else if Char.isAlpha (String.sub(uri,0)) then search 1 + else false + end + fun uriRelative uri = not (uriAbsolute uri) + + fun uriLocal uri = + if String.isPrefix "file:" uri + then SOME(String.extract(uri,5,NONE)) + else if uriRelative uri then SOME uri + else NONE + + fun uriPath s = + let + fun search (i,hadSlash) = + if i<0 then if hadSlash then SOME 0 else NONE + else case String.sub(s,i) + of #"/" => if hadSlash then NONE else search(i-1,true) + | _ => if hadSlash then SOME(i+1) else search(i-1,false) + val len = String.size s + val posOpt = search(len-1,false) + in case posOpt + of NONE => emptyUri + | SOME i => if i=0 then slash + else String.extract(s,0,SOME(i+1)) + end + + fun uriAuth uri = + let + fun searchScheme i = + if i>=String.size uri then NONE + else let val c=String.sub(uri,i) + in if #":"=c then SOME i else if isScheme c then searchScheme (i+1) + else NONE + end + fun searchSlash i = + if i>=String.size uri then NONE + else let val c=String.sub(uri,i) + in if #"/"=c then SOME i else searchSlash (i+1) + end + in + if uri="" then "" + else if not (Char.isAlpha(String.sub(uri,0))) then "" + else case searchScheme 1 + of NONE => "" + | SOME i => + if String.size uri<=i+2 then String.extract(uri,0,SOME(i+1)) + else if #"/"=String.sub(uri,i+1) andalso #"/"=String.sub(uri,i+2) + then case searchSlash (i+3) + of NONE => uri + | SOME j => String.extract(uri,0,SOME j) + else String.extract(uri,0,SOME(i+1)) + end + + fun uriScheme uri = + let + fun searchScheme i = + if i>=String.size uri then NONE + else let val c=String.sub(uri,i) + in if #":"=c then SOME i else if isScheme c then searchScheme (i+1) + else NONE + end + in + if uri="" then "" + else if not (Char.isAlpha(String.sub(uri,0))) then "" + else case searchScheme 1 + of NONE => "" + | SOME i => String.extract(uri,0,SOME(i+1)) + end + + fun uriJoin(abs,rel) = + if rel="" then uriPath abs + else if abs="" then rel + else if String.isPrefix "//" rel then uriScheme abs^rel + else if #"/"=String.sub(rel,0) then uriAuth abs^rel + else if uriAbsolute rel then rel + else uriPath abs^rel + + val compareUri = String.compare + val hashUri = UtilHash.hashString + + fun convertCommand str (src,dst) = + let + val s = Substring.all str + fun doit ss s = + if Substring.isEmpty s then ss + else let val (sl,sr) = Substring.splitr (fn c => #"%"<>c) s + in if Substring.isEmpty sl then sr::ss + else let val sl' = Substring.trimr 1 sl + in case Substring.first sr + of SOME #"1" => let val sr' = Substring.triml 1 sr + in doit (Substring.all src::sr'::ss) sl' + end + | SOME #"2" => let val sr' = Substring.triml 1 sr + in doit (Substring.all dst::sr'::ss) sl' + end + | _ => doit (Substring.all "%"::sr::ss) sl' + end + end + val ss = doit nil s + val s = Substring.concat ss + in s + end + + fun retrieveRemote uri = + let + val tmp = OS.FileSys.tmpName() + val cmd = convertCommand Config.retrieveCommand (uri,tmp) + val status = OS.Process.system cmd + val _ = if status = OS.Process.success then () + else let val _ = (OS.FileSys.remove tmp + handle OS.SysErr _ => ()) + val cmd = convertCommand + Config.retrieveCommand ("",tmp) + in raise NoSuchFile (uri,"command '"^cmd^"' failed") + end + in (Uri2String uri,tmp,true) + end + + fun retrieveUri uri = + case uriLocal uri + of SOME f => (Uri2String uri,Uri2String f,false) + | NONE => retrieveRemote uri + end diff -uNr fxp-2.0.orig/src/Unicode/Uri/uri.sml fxp-2.0/src/Unicode/Uri/uri.sml --- fxp-2.0.orig/src/Unicode/Uri/uri.sml Sat Jun 26 02:42:56 2004 +++ fxp-2.0/src/Unicode/Uri/uri.sml Thu Nov 1 08:59:06 2007 @@ -50,6 +50,8 @@ val slash = "/" + fun substringAll s = Substring.substring(s, 0, String.size s) + fun uriSuffix s = let fun search i = if i<0 then NONE else case String.sub(s,i) of #"." => SOME i @@ -154,7 +156,7 @@ fun convertCommand str (src,dst) = let - val s = Substring.all str + val s = substringAll str fun doit ss s = if Substring.isEmpty s then ss else let val (sl,sr) = Substring.splitr (fn c => #"%"<>c) s @@ -162,12 +164,12 @@ else let val sl' = Substring.trimr 1 sl in case Substring.first sr of SOME #"1" => let val sr' = Substring.triml 1 sr - in doit (Substring.all src::sr'::ss) sl' + in doit (substringAll src::sr'::ss) sl' end | SOME #"2" => let val sr' = Substring.triml 1 sr - in doit (Substring.all dst::sr'::ss) sl' + in doit (substringAll dst::sr'::ss) sl' end - | _ => doit (Substring.all "%"::sr::ss) sl' + | _ => doit (substringAll "%"::sr::ss) sl' end end val ss = doit nil s @@ -180,7 +182,7 @@ val tmp = OS.FileSys.tmpName() val cmd = convertCommand Config.retrieveCommand (uri,tmp) val status = OS.Process.system cmd - val _ = if status = OS.Process.success then () + val _ = if OS.Process.isSuccess status then () else let val _ = (OS.FileSys.remove tmp handle OS.SysErr _ => ()) val cmd = convertCommand diff -uNr fxp-2.0.orig/src/Unicode/Uri/uriEncode.orig.sml fxp-2.0/src/Unicode/Uri/uriEncode.orig.sml --- fxp-2.0.orig/src/Unicode/Uri/uriEncode.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/Uri/uriEncode.orig.sml Thu Nov 1 08:59:06 2007 @@ -0,0 +1,98 @@ +signature UriEncode = + sig + val Data2UriUtf8 : UniChar.Data -> string + val Data2UriLatin : UniChar.Data -> string + + val Vector2UriUtf8 : UniChar.Vector -> string + val Vector2UriLatin : UniChar.Vector -> string + + val String2UriUtf8 : string -> string + val String2UriLatin : string -> string + end + +structure UriEncode : UriEncode = + struct + + open UniChar UniClasses + + infix 8 >> >>> + infix 7 && &&& + infix 6 || + + val op && = Word8.andb + val op &&& = Chars.andb + val op >> = Word8.>> + val op >>> = Chars.>> + val op || = Word8.orb + + val Char2Byte = Word8.fromLargeWord o Chars.toLargeWord + + fun encodeCharUtf8 c = + if c<0wx80 then [Char2Byte c] + else if c<0wx800 + then [0wxC0 || Char2Byte(c >>> 0w6), + 0wx80 || Char2Byte(c &&& 0wx3F)] + else if c<0wx10000 + then [0wxE0 || Char2Byte(c >>> 0w12), + 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), + 0wx80 || Char2Byte(c &&& 0wx3F)] + else if c<0wx200000 + then [0wxF0 || Char2Byte(c >>> 0w18), + 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), + 0wx80 || Char2Byte(c &&& 0wx3F)] + else if c<0wx4000000 + then [0wxF8 || Char2Byte(c >>> 0w24), + 0wx80 || Char2Byte((c >>> 0w18) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), + 0wx80 || Char2Byte(c &&& 0wx3F)] + else [0wxFC || Char2Byte(c >>> 0w30), + 0wx80 || Char2Byte((c >>> 0w24) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w18) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), + 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), + 0wx80 || Char2Byte(c &&& 0wx3F)] + + fun Byte2Cc b = + let fun Quad2C b = if b<0wxA then Byte.byteToChar(b+0wx30) else Byte.byteToChar(b+0wx37) + in (Quad2C(b >> 0w4),Quad2C(b && 0wx0F)) + end + + fun precedesHex (i,cv) = + if Vector.length cv <= i+2 then false + else let val (c1,c2) = (Vector.sub(cv,i+1),Vector.sub(cv,i+2)) + in isHex c1 andalso isHex c2 + end + + fun Vector2UriUtf8 cv = + let val revd = Vector.foldli + (fn (i,c,s) => if c<0wx80 andalso (c<>0wx25 orelse precedesHex(i,cv)) + then Char2char c::s + else foldl (fn (b,s) => let val (c1,c2) = Byte2Cc b + in c2::c1:: #"%"::s + end) + s (encodeCharUtf8 c)) + nil (cv,0,NONE) + in String.implode (rev revd) + end + + fun Vector2UriLatin cv = + let val revd = Vector.foldli + (fn (i,c,s) => if c<0wx80 andalso (c<>0wx25 orelse precedesHex(i,cv)) + then Char2char c::s + else (if c>= 0w100 then s + else let val (c1,c2) = Byte2Cc (Char2Byte c) + in c2::c1:: #"%"::s + end)) + nil (cv,0,NONE) + in String.implode (rev revd) + end + + val Data2UriUtf8 = Vector2UriUtf8 o Data2Vector + val Data2UriLatin = Vector2UriLatin o Data2Vector + + val String2UriUtf8 = Vector2UriUtf8 o String2Vector + val String2UriLatin = Vector2UriLatin o String2Vector + end + diff -uNr fxp-2.0.orig/src/Unicode/Uri/uriEncode.sml fxp-2.0/src/Unicode/Uri/uriEncode.sml --- fxp-2.0.orig/src/Unicode/Uri/uriEncode.sml Sat Jun 26 02:42:56 2004 +++ fxp-2.0/src/Unicode/Uri/uriEncode.sml Thu Nov 1 08:59:06 2007 @@ -73,7 +73,7 @@ in c2::c1:: #"%"::s end) s (encodeCharUtf8 c)) - nil (cv,0,NONE) + nil cv in String.implode (rev revd) end @@ -85,7 +85,7 @@ else let val (c1,c2) = Byte2Cc (Char2Byte c) in c2::c1:: #"%"::s end)) - nil (cv,0,NONE) + nil cv in String.implode (rev revd) end diff -uNr fxp-2.0.orig/src/Unicode/encoding.cm fxp-2.0/src/Unicode/encoding.cm --- fxp-2.0.orig/src/Unicode/encoding.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/encoding.cm Thu Nov 1 08:59:08 2007 @@ -0,0 +1,8 @@ +Group + signature Encoding + structure Encoding +is + encoding.sml + + ../Util/SymDict/symdict.cm + diff -uNr fxp-2.0.orig/src/Unicode/unicode.cm fxp-2.0/src/Unicode/unicode.cm --- fxp-2.0.orig/src/Unicode/unicode.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Unicode/unicode.cm Thu Nov 1 08:59:08 2007 @@ -0,0 +1,25 @@ +Group + signature Encoding + structure Encoding + signature Encode + structure Encode + signature Uri + structure Uri + structure UriDict + signature Decode + structure Decode + signature DecodeFile + structure DecodeFile + structure DataDict + structure DataSymTab + signature UniChar + structure UniChar + signature UniClasses + structure UniClasses +is + encoding.cm + Chars/chars.cm + Decode/decode.cm + Encode/encode.cm + Uri/uri.cm + diff -uNr fxp-2.0.orig/src/Util/SymDict/dict.orig.sml fxp-2.0/src/Util/SymDict/dict.orig.sml --- fxp-2.0.orig/src/Util/SymDict/dict.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/SymDict/dict.orig.sml Thu Nov 1 08:58:59 2007 @@ -0,0 +1,323 @@ +(*--------------------------------------------------------------------------*) +(* Functor: Dict *) +(* *) +(* Depends on: *) +(* Chars *) +(* *) +(* Exceptions raised by functions in this functor: *) +(* addByIndex : NoSuchIndex *) +(* addByKey : InternalError *) +(* getByIndex : NoSuchIndex *) +(* getByKey : InternalError *) +(* getIndex : InternalError *) +(* getKey : NoSuchIndex *) +(* hasIndex : none *) +(* makeDict : none *) +(* nullDict : none *) +(* printDict : none *) +(* usedIndices : none *) +(*--------------------------------------------------------------------------*) +(* A dictionary maps keys to consecutive integers and additionally holds *) +(* a value of arbitrary type for each entry. *) +(*--------------------------------------------------------------------------*) +signature Dict = + sig + type Key + type 'a Dict + + exception NoSuchIndex + + val nullDict : string * 'a -> 'a Dict + val makeDict : string * int * 'a -> 'a Dict + val clearDict : 'a Dict * int option -> unit + + val hasIndex : 'a Dict * Key -> int option + val getIndex : 'a Dict * Key -> int + val getKey : 'a Dict * int -> Key + + val getByIndex : 'a Dict * int -> 'a + val getByKey : 'a Dict * Key -> 'a + + val setByIndex : 'a Dict * int * 'a -> unit + val setByKey : 'a Dict * Key * 'a -> unit + + val usedIndices : 'a Dict -> int + + val extractDict : 'a Dict -> (Key * 'a) array + val printDict : ('a -> string) -> 'a Dict -> unit + end + +functor Dict (structure Key : Key) : Dict = + struct + open UtilError UtilInt + + type Key = Key.Key + + exception NoSuchIndex + + (*--------------------------------------------------------------------*) + (* a dictionary can have at most size MAX_WIDTH. This is because *) + (* arrays may at most have Array.maxLen elements. We only use powers *) + (* of two as sizes, so we are really only interested in the position *) + (* of maxLen's highest bit. That would be the maximal width for hash *) + (* tables, and thus we must decrease it by one for obtaining the max *) + (* table width. *) + (*--------------------------------------------------------------------*) + fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1)) + val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1 + + type Bucket = (Key * int) list + val nullBucket = nil : Bucket + + (*--------------------------------------------------------------------*) + (* buckets are unsorted - they are probably small, so comparing the *) + (* keys might be overkill. *) + (*--------------------------------------------------------------------*) + fun addToBucket (ni as (key,_),bucket) = + let + fun doit nil = [ni] + | doit (nis as (ni' as (key',_))::rest) = + case Key.compare (key',key) + of LESS => ni'::doit rest + | EQUAL => ni::rest + | GREATER => ni::nis + in + doit bucket + end + fun searchBucket (key,bucket) = + let + fun doit nil = NONE + | doit ((key',i)::rest) = + case Key.compare (key',key) + of LESS => doit rest + | EQUAL => SOME i + | GREATER => NONE + in + doit bucket + end + + (*--------------------------------------------------------------------*) + (* a dictionary consists of *) + (* - a string desc saying what is stored in this dictionary *) + (* - an array tab holding for each index its key and value *) + (* - a hash table, i.e. Bucket array, of double size than tab *) + (* - a hashFun mapping Key to the range of the hash table *) + (* - an integer width for computing table sizes *) + (* - an integer size wich is the size of the value table *) + (* - an integer count holding the next free index *) + (* - a default value for the value table *) + (*--------------------------------------------------------------------*) + type 'a Dict = {desc : string, + tab : (Key * 'a) array ref, + hashTab : Bucket array ref, + hashFun : (Key -> int) ref, + width : int ref, (* bit width *) + size : int ref, (* tab size=2^width, hash size is double *) + count : int ref, (* number of entries *) + def : 'a (* default for values *) + } + fun nullDict (desc,def) = {desc = desc, + tab = ref (Array.array(1,(Key.null,def))), + hashTab = ref (Array.array(2,nullBucket)), + hashFun = ref (fn _ => 0), + count = ref 0, + size = ref 1, + width = ref 0, + def = def} + + (*--------------------------------------------------------------------*) + (* how many entries are in the dictionary? *) + (*--------------------------------------------------------------------*) + fun usedIndices ({count,...}:'a Dict) = !count + + (*--------------------------------------------------------------------*) + (* what is the table load, i.e. percentage of number of entries to *) + (* hash table size = 100*count/(2*size) = 50*count/size. *) + (*--------------------------------------------------------------------*) + fun hashRatio({count,size,...}:'a Dict) = 50 * !count div !size + handle Div => 100 + + (*--------------------------------------------------------------------*) + (* this is the hash function. Key.hash hashes data to arbitrary *) + (* words, that are mapped to the hash range by this function, where *) + (* mask is the bitmask corresponding to the size of the hash table: *) + (* 1. square the word produced by Key.hash *) + (* 2. take the width bits from the middle of the square, these are *) + (* the bit-places influenced by all input bit-places: *) + (* - shift to the right by half of the destination width *) + (* - mask out all bits to the left of destination *) + (* this is a simple strategy but experiences good results. *) + (*--------------------------------------------------------------------*) + fun square (x:word) = Word.*(x,x) + fun hashKey(half,mask) x = + Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half))) + fun makeHashFun(size,width) = + let + val mask = 0w2*Word.fromInt size-0w1 + val half = Word.fromInt((width+1) div 2) + in + hashKey(half,mask) + end + + (*--------------------------------------------------------------------*) + (* create a new dictionary for 2^w, but at least 2 and at most 2^m *) + (* entries, where m is the value of MAX_WIDTH. *) + (*--------------------------------------------------------------------*) + fun makeDict (desc,w,def) = + let + val width= Int.min(Int.max(1,w),MAX_WIDTH) + val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1))) + in {desc = desc, + tab = ref (Array.array(size,(Key.null,def))), + hashTab = ref (Array.array(2*size,nullBucket)), + hashFun = ref (makeHashFun(size,width)), + width = ref width, + size = ref size, + count = ref 0, + def = def} + end + + (*--------------------------------------------------------------------*) + (* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *) + (*--------------------------------------------------------------------*) + fun clearDict (dict:'a Dict,widthOpt) = + case widthOpt + of NONE => + let + val {tab=ref tab,hashTab=ref hashTab,size,count,def,...} = dict + val _ = appInterval (fn i => Array.update(tab,i,(Key.null,def))) (0,!count-1) + val _ = appInterval (fn i => Array.update(hashTab,i,nullBucket)) (0,!size*2-1) + in + count := 0 + end + | SOME w => + let + val {tab,hashTab,hashFun,width,size,count,def,...} = dict + val newWidth = Int.min(Int.max(1,w),MAX_WIDTH) + val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1))) + val _ = tab := (Array.array(newSize,(Key.null,def))) + val _ = hashTab := (Array.array(2*newSize,nullBucket)) + val _ = hashFun := (makeHashFun(newSize,newWidth)) + val _ = width := newWidth + val _ = size := newSize + in + count := 0 + end + + (*--------------------------------------------------------------------*) + (* grow a dictionary to the double size. raise InternalError if the *) + (* dictionary already has maximal size. *) + (*--------------------------------------------------------------------*) + fun growDictionary ({desc,tab,hashTab,hashFun,width,size,count,def}:'a Dict) = + let + val oldTab = !tab + val _ = if !width < MAX_WIDTH then width := !width+1 + else raise InternalError + ("Dict","growDictionary", + String.concat ["growing the ",desc," dictionary ", + "exceeded the system maximum size of ", + Int.toString Array.maxLen," for arrays"]) + val _ = size := !size*2 + val _ = tab := Array.array(!size,(Key.null,def)) + val _ = hashTab := Array.array(!size*2,nullBucket) + val _ = hashFun := makeHashFun(!size,!width) + + fun addTo (i,kv as (key,_)) = + let + val idx = !hashFun key + val _ = Array.update(!hashTab,idx,addToBucket((key,i),Array.sub(!hashTab,idx))) + val _ = Array.update(!tab,i,kv) + in () + end + in + Array.appi addTo (oldTab,0,NONE) + end + + (*--------------------------------------------------------------------*) + (* lookup the key for an index of the dictionary. *) + (*--------------------------------------------------------------------*) + fun getKey({tab,count,...}:'a Dict,idx) = + if !count>idx then #1(Array.sub(!tab,idx)) + else raise NoSuchIndex + + (*--------------------------------------------------------------------*) + (* map a Key to its index in the dictionary. if it is not in the *) + (* dictionary yet, add a new entry with a new index. grow the table *) + (* if there is no more free index in the dictionary. *) + (*--------------------------------------------------------------------*) + fun getIndex(dict as {tab,hashTab,hashFun,size,count,def,...}:'a Dict,key) = + let + val k = !hashFun key + val bucket = Array.sub(!hashTab,k) + in + case searchBucket(key,bucket) + of SOME idx => idx + | NONE => let val idx = !count + val (k',buck') = if !size>idx then (k,bucket) + else let val _ = growDictionary dict + val k' = !hashFun key + val buck' = Array.sub(!hashTab,k') + in (k',buck') + end + val _ = Array.update(!hashTab,k',addToBucket((key,idx),buck')) + val _ = Array.update(!tab,idx,(key,def)) + val _ = count := idx+1 + in idx + end + end + + (*--------------------------------------------------------------------*) + (* does a Key have an entry in a dictionary? *) + (*--------------------------------------------------------------------*) + fun hasIndex({hashTab,hashFun,...}:'a Dict,key) = + let + val idx = !hashFun key + val bucket = Array.sub(!hashTab,idx) + in + searchBucket(key,bucket) + end + + (*--------------------------------------------------------------------*) + (* get the value stored for index idx *) + (*--------------------------------------------------------------------*) + fun getByIndex({tab,count,...}:'a Dict,idx) = + if !count>idx then #2(Array.sub(!tab,idx)) + else raise NoSuchIndex + + (*--------------------------------------------------------------------*) + (* get the value stored for a key *) + (*--------------------------------------------------------------------*) + fun getByKey(dict,key) = + getByIndex(dict,getIndex(dict,key)) + + (*--------------------------------------------------------------------*) + (* enter a value for index idx. *) + (*--------------------------------------------------------------------*) + fun setByIndex({tab,count,...}:'a Dict,idx,a) = + if !count>idx then let val (key,_) = Array.sub(!tab,idx) + in Array.update(!tab,idx,(key,a)) + end + else raise NoSuchIndex + + (*--------------------------------------------------------------------*) + (* enter a value for a key. *) + (*--------------------------------------------------------------------*) + fun setByKey(dict,key,v) = + setByIndex(dict,getIndex(dict,key),v) + + (*--------------------------------------------------------------------*) + (* extract the contents of the dictionary to an array. *) + (*--------------------------------------------------------------------*) + fun extractDict({count,tab,...}:'a Dict) = + Array.tabulate(!count,fn i => Array.sub(!tab,i)) + + (*--------------------------------------------------------------------*) + (* print the contents of the dictionary. *) + (*--------------------------------------------------------------------*) + fun printDict X2String ({desc,tab,count,...}:'a Dict) = + (print (desc^" dictionary:\n"); + Array.appi + (fn (n,(key,value)) => + print (" "^Int.toString n^": "^Key.toString key^" = "^X2String value^"\n")) + (!tab,0,SOME (!count))) + end diff -uNr fxp-2.0.orig/src/Util/SymDict/dict.sml fxp-2.0/src/Util/SymDict/dict.sml --- fxp-2.0.orig/src/Util/SymDict/dict.sml Sat Jun 26 02:42:56 2004 +++ fxp-2.0/src/Util/SymDict/dict.sml Thu Nov 1 08:58:59 2007 @@ -230,7 +230,7 @@ in () end in - Array.appi addTo (oldTab,0,NONE) + Array.appi addTo oldTab end (*--------------------------------------------------------------------*) @@ -316,8 +316,8 @@ (*--------------------------------------------------------------------*) fun printDict X2String ({desc,tab,count,...}:'a Dict) = (print (desc^" dictionary:\n"); - Array.appi + ArraySlice.appi (fn (n,(key,value)) => print (" "^Int.toString n^": "^Key.toString key^" = "^X2String value^"\n")) - (!tab,0,SOME (!count))) + (ArraySlice.slice(!tab,0,SOME (!count)))) end diff -uNr fxp-2.0.orig/src/Util/SymDict/symbolTable.orig.sml fxp-2.0/src/Util/SymDict/symbolTable.orig.sml --- fxp-2.0.orig/src/Util/SymDict/symbolTable.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/SymDict/symbolTable.orig.sml Thu Nov 1 08:58:59 2007 @@ -0,0 +1,314 @@ + + + + + + +(*--------------------------------------------------------------------------*) +(* Functor: SymbolTable *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* getSymIndex : Key.InternalError *) +(* getSymKey : NoSuchSymbol *) +(* hasSymIndex : none *) +(* makeSymTable : none *) +(* nullSymTable : none *) +(* printSymTable : none *) +(* usedSymbols : none *) +(*--------------------------------------------------------------------------*) +(* A symbol table maps Keys to consecutive integers. *) +(*--------------------------------------------------------------------------*) +signature SymTable = + sig + type Key + type SymTable + + exception NoSuchSymbol + + val nullSymTable : string -> SymTable + val makeSymTable : string * int -> SymTable + val clearSymTable : SymTable * int option -> unit + + val hasSymIndex : SymTable * Key -> int option + val getSymIndex : SymTable * Key -> int + val getSymKey : SymTable * int -> Key + val usedSymbols : SymTable -> int + + val assignSymIndex : SymTable * Key * int -> unit + val reserveSymIndex : SymTable -> int + + val extractSymTable : SymTable -> Key vector + val printSymTable : SymTable -> unit + end + +functor SymTable (structure Key : Key) : SymTable = + struct + open UtilError UtilInt + + exception NoSuchSymbol + + type Key = Key.Key + + (*--------------------------------------------------------------------*) + (* a symbol table can have at most size MAX_WIDTH. This is because *) + (* arrays may at most have Array.maxLen elements. We only use powers *) + (* of two as sizes, so we are really only interested in the position *) + (* of maxLen's highest bit. That would be the maximal width for hash *) + (* tables, and thus we must decrease it by one for obtaining the max *) + (* table width. *) + (*--------------------------------------------------------------------*) + fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1)) + val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1 + + type Bucket = (Key * int) list + val nullBucket = nil : Bucket + + (*--------------------------------------------------------------------*) + (* buckets are sorted - though they are probably small. *) + (*--------------------------------------------------------------------*) + fun addToBucket (ni as (key,_),bucket) = + let + fun doit nil = [ni] + | doit (nis as (ni' as (key',_))::rest) = + case Key.compare (key',key) + of LESS => ni'::doit rest + | EQUAL => ni::rest + | GREATER => ni::nis + in + doit bucket + end + fun searchBucket (key,bucket) = + let + fun doit nil = NONE + | doit ((key',i)::rest) = + case Key.compare (key',key) + of LESS => doit rest + | EQUAL => SOME i + | GREATER => NONE + in + doit bucket + end + + (*--------------------------------------------------------------------*) + (* a symbol table consists of *) + (* - an array tab holding for each index its key *) + (* - a hash table, i.e. Bucket array, of double size than tab *) + (* - a hashFun mapping Key to the range of the hash table *) + (* - an integer width for computing table sizes *) + (* - an integer size wich is the size of the value table *) + (* - an integer count holding the next free index *) + (*--------------------------------------------------------------------*) + type SymTable = {desc : string, + tab : Key array ref, + hash : Bucket array ref, + hashFun : (Key -> int) ref, + width : int ref, (* bit width *) + size : int ref, (* tab size=2^width, hash size is double *) + count : int ref (* number of entries *) + } + + fun nullSymTable desc = {desc = desc, + tab = ref (Array.array(1,Key.null)), + hash = ref (Array.array(2,nullBucket)), + hashFun = ref (fn _ => 0), + count = ref 0, + size = ref 1, + width = ref 0} : SymTable + + (*--------------------------------------------------------------------*) + (* how many entries are in the symtable? *) + (*--------------------------------------------------------------------*) + fun usedSymbols ({count,...}:SymTable) = !count + + (*--------------------------------------------------------------------*) + (* what is the table load, i.e. percentage of number of entries to *) + (* hash table size = 100*count/(2*size) = 50*count/size. *) + (*--------------------------------------------------------------------*) + fun hashRatio({count,size,...}:SymTable) = 50 * !count div !size + handle Div => 100 + + (*--------------------------------------------------------------------*) + (* this is the hash function. Key.hash hashes data to arbitrary *) + (* words, that are mapped to the hash range by this function, where *) + (* mask is the bitmask corresponding to the size of the hash table: *) + (* 1. square the word produced by Key.hash *) + (* 2. take the width bits from the middle of the square, these are *) + (* the bit-places influenced by all input bit-places: *) + (* - shift to the right by half of the destination width *) + (* - mask out all bits to the left of destination *) + (* this is a simple strategy but experiences good results. *) + (*--------------------------------------------------------------------*) + fun square (x:word) = Word.*(x,x) + fun hashKey(half,mask) x = + Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half))) + fun makeHashFun(size,width) = + let + val mask = Word.fromInt(2*size-1) + val half = Word.fromInt((width+1) div 2) + in + hashKey(half,mask) + end + + (*--------------------------------------------------------------------*) + (* create a new symtable for 2^w, but at least 2 and at most 2^m *) + (* entries, where m is the value of MAX_WIDTH. *) + (*--------------------------------------------------------------------*) + fun makeSymTable (desc,w) = + let + val width= Int.min(Int.max(1,w),MAX_WIDTH) + val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1))) + in {desc = desc, + tab = ref (Array.array(size,Key.null)), + hash = ref (Array.array(2*size,nullBucket)), + hashFun = ref (makeHashFun(size,width)), + width = ref width, + size = ref size, + count = ref 0} + end + + (*--------------------------------------------------------------------*) + (* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *) + (*--------------------------------------------------------------------*) + fun clearSymTable (symTab:SymTable,widthOpt) = + case widthOpt + of NONE => + let + val {tab=ref tab,hash=ref hash,size,count,...} = symTab + val _ = appInterval (fn i => Array.update(tab,i,Key.null)) (0,!count-1) + val _ = appInterval (fn i => Array.update(hash,i,nullBucket)) (0,!size*2-1) + in + count := 0 + end + | SOME w => + let + val {tab,hash,hashFun,width,size,count,...} = symTab + val newWidth = Int.min(Int.max(1,w),MAX_WIDTH) + val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1))) + val _ = tab := (Array.array(newSize,Key.null)) + val _ = hash := (Array.array(2*newSize,nullBucket)) + val _ = hashFun := (makeHashFun(newSize,newWidth)) + val _ = width := newWidth + val _ = size := newSize + in + count := 0 + end + + (*--------------------------------------------------------------------*) + (* grow a symtable to the double size. raise InternalError if the *) + (* table already has maximal size. *) + (*--------------------------------------------------------------------*) + fun growTable ({desc,tab,hash,hashFun,width,size,count}:SymTable) = + let + val newWidth = if !width < MAX_WIDTH then !width+1 + else raise InternalError + ("SymTable","growTable", + String.concat ["growing the ",desc," symbol table ", + "exceeded the system maximum size of ", + Int.toString Array.maxLen," for arrays"]) + val newSize = !size*2 + + val oldTab = !tab + val newTab = Array.array(newSize,Key.null) + val newHash = Array.array(2*newSize,nullBucket) + val newHashFun = makeHashFun(newSize,newWidth) + + fun addToNew (inv as (i,key)) = + let + val idx = newHashFun key + val _ = Array.update(newHash,idx,addToBucket((key,i),Array.sub(newHash,idx))) + val _ = Array.update(newTab,i,key) + in () + end + val _ = Array.appi addToNew (!tab,0,NONE) + + val _ = tab := newTab + val _ = hash := newHash + val _ = size := newSize + val _ = width := newWidth + val _ = hashFun := newHashFun + in () + end + + (*--------------------------------------------------------------------*) + (* lookup the key for an index of the symbol table. *) + (*--------------------------------------------------------------------*) + fun getSymKey({tab,count,...}:SymTable,idx) = + if !count>idx then Array.sub(!tab,idx) + else raise NoSuchSymbol + + (*--------------------------------------------------------------------*) + (* map a Key to its index in the symbol table. if it is not in the *) + (* symbol table yet, add a new entry with a new index. grow the table *) + (* if there is no more free index in the table. *) + (*--------------------------------------------------------------------*) + fun getSymIndex(st as {tab,hash,hashFun,size,count,...}:SymTable,key) = + let + val idx = !hashFun key + val bucket = Array.sub(!hash,idx) + in + case searchBucket(key,bucket) + of SOME i => i + | NONE => let val i = !count + val (idx',buck') = if !size>i then (idx,bucket) + else let val _ = growTable st + val idx' = !hashFun key + val buck' = Array.sub(!hash,idx') + in (idx',buck') + end + val _ = Array.update(!hash,idx',addToBucket((key,i),buck')) + val _ = Array.update(!tab,i,key) + val _ = count := i+1 + in i + end + end + + (*--------------------------------------------------------------------*) + (* does a Key have an entry in a symbol table? *) + (*--------------------------------------------------------------------*) + fun hasSymIndex({hash,hashFun,...}:SymTable,key) = + let + val idx = !hashFun key + val buck = Array.sub(!hash,idx) + in + searchBucket(key,buck) + end + + (*--------------------------------------------------------------------*) + (* reserve an index for a (yet unknown) key. *) + (*--------------------------------------------------------------------*) + fun reserveSymIndex(st as {size,count=count as ref i,...}:SymTable) = + let + val _ = if !size>i then () else growTable st + val _ = count := i+1 + in i + end + + (*--------------------------------------------------------------------*) + (* assign an index to a (previously reserved) index. *) + (*--------------------------------------------------------------------*) + fun assignSymIndex(st as {count,hash,hashFun,tab,...}:SymTable,key,i) = + if !count<=i then raise NoSuchSymbol + else let val idx = !hashFun key + val buck = Array.sub(!hash,idx) + val newBuck = addToBucket((key,i),buck) + val _ = Array.update(!hash,idx,newBuck) + val _ = Array.update(!tab,i,key) + in () + end + + (*--------------------------------------------------------------------*) + (* extract the contents of a symbol table to a vector. *) + (*--------------------------------------------------------------------*) + fun extractSymTable({count,tab,...}:SymTable) = + Array.extract(!tab,0,SOME(!count)) + + (*--------------------------------------------------------------------*) + (* print the contents of the symbol table. *) + (*--------------------------------------------------------------------*) + fun printSymTable ({desc,tab,count,...}:SymTable) = + (print (desc^" table:\n"); + Array.appi + (fn (n,key) => + print (" "^Int.toString n^": "^Key.toString key^"\n")) + (!tab,0,SOME (!count))) + end diff -uNr fxp-2.0.orig/src/Util/SymDict/symbolTable.sml fxp-2.0/src/Util/SymDict/symbolTable.sml --- fxp-2.0.orig/src/Util/SymDict/symbolTable.sml Sat Jun 26 02:42:56 2004 +++ fxp-2.0/src/Util/SymDict/symbolTable.sml Thu Nov 1 08:58:59 2007 @@ -219,7 +219,7 @@ val _ = Array.update(newTab,i,key) in () end - val _ = Array.appi addToNew (!tab,0,NONE) + val _ = Array.appi addToNew (!tab) val _ = tab := newTab val _ = hash := newHash @@ -300,15 +300,15 @@ (* extract the contents of a symbol table to a vector. *) (*--------------------------------------------------------------------*) fun extractSymTable({count,tab,...}:SymTable) = - Array.extract(!tab,0,SOME(!count)) + ArraySlice.vector(ArraySlice.slice(!tab,0,SOME(!count))) (*--------------------------------------------------------------------*) (* print the contents of the symbol table. *) (*--------------------------------------------------------------------*) fun printSymTable ({desc,tab,count,...}:SymTable) = (print (desc^" table:\n"); - Array.appi + ArraySlice.appi (fn (n,key) => print (" "^Int.toString n^": "^Key.toString key^"\n")) - (!tab,0,SOME (!count))) + (ArraySlice.slice(!tab,0,SOME (!count)))) end diff -uNr fxp-2.0.orig/src/Util/SymDict/symdict.cm fxp-2.0/src/Util/SymDict/symdict.cm --- fxp-2.0.orig/src/Util/SymDict/symdict.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/SymDict/symdict.cm Thu Nov 1 08:58:59 2007 @@ -0,0 +1,30 @@ +Group + signature Dict + functor Dict + structure IntListDict + structure IntListSymTab + structure KeyIntSet + structure IntSetDict + signature Key + structure StringDict + signature SymTable + functor SymTable +is + +#if (SMLNJ_MINOR_VERSION > 40) + dict.sml + symbolTable.sml + $/basis.cm +#else + dict.orig.sml + symbolTable.orig.sml +#endif + + intDict.sml + intListDict.sml + intSetDict.sml + key.sml + stringDict.sml + + ../util.cm + diff -uNr fxp-2.0.orig/src/Util/intSets.orig.sml fxp-2.0/src/Util/intSets.orig.sml --- fxp-2.0.orig/src/Util/intSets.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/intSets.orig.sml Thu Nov 1 08:59:00 2007 @@ -0,0 +1,173 @@ + + + + + + + +signature IntSets = + sig + eqtype IntSet + + val emptyIntSet : IntSet + val singleIntSet : int -> IntSet + val fullIntSet : int -> IntSet + + val isEmptyIntSet : IntSet -> bool + val inIntSet : int * IntSet -> bool + + val compareIntSets: IntSet * IntSet -> order + val hashIntSet : IntSet -> word + + val addIntSet : int * IntSet -> IntSet + val delIntSet : int * IntSet -> IntSet + + val cupIntSets : IntSet * IntSet -> IntSet + val capIntSets : IntSet * IntSet -> IntSet + val diffIntSets : IntSet * IntSet -> IntSet + + val IntSet2List : IntSet -> int list + val IntList2Set : int list -> IntSet + end + +structure IntSets : IntSets = + struct + structure W = Word32 + val wordSize = W.wordSize + + type IntSet = W.word vector + + infix 7 << >> + infix 6 && + infix 5 || + + val op >> = W.>> + val op << = W.<< + val op && = W.andb + val op || = W.orb + val !! = W.notb + + fun normalize (vec:IntSet) = + let val max = Vector.foldli + (fn (i,w,max) => if w=0wx0 then i else max) 0 (vec,0,NONE) + in Vector.extract (vec,0,SOME max) + end + + val emptyIntSet = Vector.fromList nil : IntSet + + fun fullIntSet n = let val size = (n+wordSize-1) div wordSize + val full = 0w0-0w1:W.word + val bits = (n-1) mod wordSize+1 + val last = full >> (Word.fromInt (wordSize-bits)) + in Vector.tabulate(n div wordSize+1, + fn i => if i if i=idx then mask else 0w0):IntSet + end + + fun isEmptyIntSet vec = Vector.length vec=0 + + fun inIntSet(n,vec) = + let val idx = n div wordSize + in if idx>=Vector.length vec then false + else let val mask = 0w1 << (Word.fromInt (n mod wordSize)) + in Vector.sub(vec,idx) && mask <> 0w0 + end + end + + fun addIntSet(n,vec) = + let + val idx = n div wordSize + val mask = 0w1 << (Word.fromInt (n mod wordSize)) + val size = Vector.length vec + in + if size>idx + then Vector.mapi (fn (i,x) => if i=idx then x||mask else x) (vec,0,NONE) + else Vector.tabulate + (idx+1,fn i => if i if i=idx then x && mask else x) (vec,0,NONE) + end + in normalize vec1 + end + + fun capIntSets(vec1,vec2) = + let + val l12 = Int.min(Vector.length vec1,Vector.length vec2) + val v12 = Vector.tabulate(l12,fn i => Vector.sub(vec1,i) && Vector.sub(vec2,i)) + in + normalize v12 + end + + fun cupIntSets(vec1,vec2) = + let + val (l1,l2) = (Vector.length vec1,Vector.length vec2) + val (shorter,longer,v) = if l1<=l2 then (l1,l2,vec2) else (l2,l1,vec1) + in + Vector.tabulate (longer,fn i => if i>=shorter then Vector.sub(v,i) + else Vector.sub(vec1,i) || Vector.sub(vec2,i)) + end + + fun diffIntSets(vec1,vec2) = + let + val (l1,l2) = (Vector.length vec1,Vector.length vec2) + val vec1 = Vector.tabulate + (l1,fn i => if i>=l2 then Vector.sub(vec1,i) + else Vector.sub(vec1,i) && !!(Vector.sub(vec2,i))) + in normalize vec1 + end + + fun IntList2Set l = List.foldl addIntSet emptyIntSet l + + fun IntSet2List vec = + let + val size = Vector.length vec + fun doOne (w,off,yet) = + let fun doit (i,mask) = + if i=wordSize then yet + else if w&&mask=0w0 then doit(i+1,mask<<0wx1) + else (off+i)::doit(i+1,mask<<0wx1) + in doit(0,0wx1) + end + fun doAll i = if i>=size then nil + else doOne(Vector.sub(vec,i),wordSize*i,(doAll (i+1))) + in doAll 0 + end + + fun compareIntSets (vec1,vec2:IntSet) = + let + val (l1,l2) = (Vector.length vec1,Vector.length vec2) + val (l12,ifEq) = case Int.compare(l1,l2) + of LESS => (l1,LESS) + | order => (l2,order) + fun doit i = if i>=l12 then ifEq + else case W.compare(Vector.sub(vec1,i),Vector.sub(vec2,i)) + of EQUAL => doit (i+1) + | order => order + in doit 0 + end + + val intShift = case Int.precision + of NONE => 0w0 + | SOME x => Word.fromInt(Int.max(wordSize-x+1,0)) + + fun hashIntSet vec = + case Vector.length vec + of 0 => 0w0 + | 1 => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0),intShift))) + | l => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0)+Vector.sub(vec,l-1),intShift))) + end diff -uNr fxp-2.0.orig/src/Util/intSets.sml fxp-2.0/src/Util/intSets.sml --- fxp-2.0.orig/src/Util/intSets.sml Sat Jun 26 02:42:57 2004 +++ fxp-2.0/src/Util/intSets.sml Fri Nov 2 13:40:50 2007 @@ -49,8 +49,8 @@ fun normalize (vec:IntSet) = let val max = Vector.foldli - (fn (i,w,max) => if w=0wx0 then i else max) 0 (vec,0,NONE) - in Vector.extract (vec,0,SOME max) + (fn (i,w,max) => if w=0wx0 then i else max) 0 vec + in VectorSlice.vector(VectorSlice.slice (vec,0,SOME max)) end val emptyIntSet = Vector.fromList nil : IntSet @@ -88,7 +88,7 @@ val size = Vector.length vec in if size>idx - then Vector.mapi (fn (i,x) => if i=idx then x||mask else x) (vec,0,NONE) + then Vector.mapi (fn (i,x) => if i=idx then x||mask else x) vec else Vector.tabulate (idx+1,fn i => if i if i=idx then x && mask else x) (vec,0,NONE) + (fn (i,x) => if i=idx then x && mask else x) vec end in normalize vec1 end diff -uNr fxp-2.0.orig/src/Util/util.cm fxp-2.0/src/Util/util.cm --- fxp-2.0.orig/src/Util/util.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/util.cm Thu Nov 1 08:58:59 2007 @@ -0,0 +1,38 @@ +Group + signature IntSets + structure IntSets + signature Options + structure Options + signature UtilCompare + structure UtilCompare + signature UtilError + structure UtilError + signature UtilHash + structure UtilHash + signature UtilInt + structure UtilInt + signature UtilList + structure UtilList + signature UtilString + structure UtilString +is + intLists.sml + +#if (SMLNJ_MINOR_VERSION > 40) + intSets.sml + utilString.sml + utilTime.sml + $/basis.cm +#else + intSets.orig.sml + utilString.orig.sml + utilTime.orig.sml +#endif + + options.sml + utilCompare.sml + utilError.sml + utilHash.sml + utilInt.sml + utilList.sml + diff -uNr fxp-2.0.orig/src/Util/utilString.orig.sml fxp-2.0/src/Util/utilString.orig.sml --- fxp-2.0.orig/src/Util/utilString.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/utilString.orig.sml Thu Nov 1 08:58:59 2007 @@ -0,0 +1,235 @@ +(*--------------------------------------------------------------------------*) +(* Structure: UtilString *) +(*--------------------------------------------------------------------------*) +signature UtilString = + sig + val quoteString : char -> string -> string + + val numberNth : int -> string + val prependAnA : string -> string + + val nBlanks : int -> string + val padxLeft : char -> string * int -> string + val padxRight : char -> string * int -> string + + val breakLines : int -> string -> string list + + val toUpperFirst : string -> string + val toUpperString : string -> string + + val Int2String : int -> string + + val Bool2xString : string * string -> bool -> string + val Bool2String : bool -> string + + val Option2xString : string * (('a -> string) -> 'a -> string) + -> ('a -> string) -> 'a option -> string + val Option2String0 : ('a -> string) -> 'a option -> string + val Option2String : ('a -> string) -> 'a option -> string + + val List2xString : string * string * string -> ('a -> string) -> 'a list -> string + val List2String0 : ('a -> string) -> 'a list -> string + val List2String : ('a -> string) -> 'a list -> string + + val Vector2xString : string * string * string -> ('a -> string) -> 'a vector -> string + val Vector2String : ('a -> string) -> 'a vector -> string + end + +structure UtilString : UtilString = + struct + fun quoteString q s = let val quote = String.implode [q] in quote^s^quote end + + (*--------------------------------------------------------------------*) + (* generate a string with the ordinal number of n, by appending *) + (* "st", "nd", "rd" or "th" to the number. *) + (*--------------------------------------------------------------------*) + fun numberNth n = + let val suffix = case n mod 9 + of 1 => "st" + | 2 => "nd" + | 3 => "rd" + | _ => "th" + in Int.toString n^suffix + end + + (*--------------------------------------------------------------------*) + (* is the single character c represented by a word starting with a *) + (* vocal in the alphabet? (l~ell->true, k~kay->false) *) + (*--------------------------------------------------------------------*) + fun vocalLetter c = + case Char.toLower c + of #"a" => true + | #"f" => true + | #"h" => true + | #"i" => true + | #"l" => true + | #"m" => true + | #"n" => true + | #"o" => true + | #"r" => true + | #"s" => true + | #"x" => true + | #"8" => true + | _ => false + + (*--------------------------------------------------------------------*) + (* is character c a vocal? *) + (*--------------------------------------------------------------------*) + fun isVocal c = + case Char.toLower c + of #"a" => true + | #"e" => true + | #"i" => true + | #"o" => true + | #"u" => true + | _ => false + + (*--------------------------------------------------------------------*) + (* does a word require "an" as undefinite article? true if: *) + (* - it is a single letter that starts with a vocal in the alphabet *) + (* - its first two letters are capitals, i.e. it is an abbreviation, *) + (* and its first letter starts with a vocal in the alphabet *) + (* - it has more than one letter, is not an abbreviation, and either *) + (* + it starts with a, i or o *) + (* + it starts with e and the second letter is not a u (europe) *) + (* + it starts with a u and continues with a vocal (very unlikely, *) + (* only in c.s., like uuencoded or uid *) + (* + it starts with a u, continues with a consonant not followed by *) + (* an i (like in unicode); that is something like un-... *) + (* This ruleset is not complete since it does not cover, e.g., the *) + (* word uninvented, but sufficient for most cases. *) + (* (Is english pronounciation decidable at all?) *) + (*--------------------------------------------------------------------*) + fun extendsAtoAn word = + case String.explode word + of nil => false + | [c] => vocalLetter c + | c1::c2::cs => if not (Char.isLower c1 orelse Char.isLower c2) + then vocalLetter c1 + else case Char.toLower c1 + of #"a" => true + | #"i" => true + | #"o" => true + | #"e" => Char.toLower c2 <> #"u" + | #"u" => if isVocal c2 then false + else (case cs + of nil => true + | c3::_ => Char.toLower c3 <> #"i") + | _ => false + + (*--------------------------------------------------------------------*) + (* add an undefinite article to a word. *) + (*--------------------------------------------------------------------*) + fun prependAnA word = if extendsAtoAn word then "an "^word else "a "^word + + (*--------------------------------------------------------------------*) + (* generate a list/string of n times character c. *) + (*--------------------------------------------------------------------*) + fun nCharsC c n = if n>0 then c::nCharsC c (n-1) else nil + fun nChars c n = String.implode (nCharsC c n) + val nBlanks = nChars #" " + + (*--------------------------------------------------------------------*) + (* add a minimal number of characters c to the left/right of a string *) + (* in order to make its length at least n. *) + (*--------------------------------------------------------------------*) + fun padxLeft c (s,n) = (nChars c (n-String.size s))^s + fun padxRight c (s,n) = s^(nChars c (n-String.size s)) + val padLeft = padxLeft #" " + val padRight = padxRight #" " + + (*--------------------------------------------------------------------*) + (* break a string into several lines of length width. *) + (*--------------------------------------------------------------------*) + fun breakLines width str = + let + val tokens = String.tokens (fn c => #" "=c) str + fun makeLine(toks,lines) = if null toks then lines + else (String.concat (rev toks))::lines + fun doit w (toks,lines) nil = makeLine(toks,lines) + | doit w (toks,lines) (one::rest) = + let + val l = String.size one + val w1 = w+l + in + if w1=width then doit 0 (nil,one::makeLine(toks,lines)) rest + else doit (l+1) ([" ",one],makeLine(toks,lines)) rest + end + in List.rev (doit 0 (nil,nil) tokens) + end + + (*--------------------------------------------------------------------*) + (* convert the first/all characters of a string to upper case *) + (*--------------------------------------------------------------------*) + fun toUpperFirst str = + case String.explode str + of nil => "" + | c::cs => String.implode (Char.toUpper c::cs) + fun toUpperString str = + String.implode(map Char.toUpper (String.explode str)) + + (*--------------------------------------------------------------------*) + (* return a string representation of an int, char or unit. *) + (*--------------------------------------------------------------------*) + val Int2String = Int.toString + val Char2String = Char.toString + fun Unit2String() = "()" + + (*--------------------------------------------------------------------*) + (* return a string representation of a boolean. *) + (*--------------------------------------------------------------------*) + fun Bool2xString (t,f) b = if b then t else f + val Bool2String = Bool2xString ("true","false") + + (*--------------------------------------------------------------------*) + (* return a string representation of an option. *) + (* the first arg is a string for the NONE case, the second a function *) + (* that converts x to a string, given a function for doing so. *) + (*--------------------------------------------------------------------*) + fun Option2xString (none,Some2String) x2String opt = + case opt + of NONE => none + | SOME x => Some2String x2String x + fun Option2String0 x2String = Option2xString ("",fn f => fn x => f x) x2String + fun Option2String x2String = Option2xString ("NONE",fn f => fn x => "SOME "^f x) x2String + + (*--------------------------------------------------------------------*) + (* return a string representation of list; start with pre, separate *) + (* with sep and finish with post; use X2String for each element. *) + (*--------------------------------------------------------------------*) + fun List2xString (pre,sep,post) X2String nil = pre^post + | List2xString (pre,sep,post) X2String l = + let fun doit nil _ = [post] + | doit (x::r) str = str::X2String x::doit r sep + in String.concat (doit l pre) + end + fun List2String X2String nil = "[]" + | List2String X2String l = + let fun doit nil _ = ["]"] + | doit (x::r) str = str::X2String x::doit r "," + in String.concat (doit l "[") + end + fun List2String0 X2String nil = "" + | List2String0 X2String l = + let fun doit nil _ = nil + | doit (x::r) str = str::X2String x::doit r " " + in String.concat (doit l "") + end + + (* a compiler bug in smlnj 110 makes the following uncompilable: *) + (* fun List2String X2String xs = List2xString ("[",",","]") X2String xs *) + (* fun List2String0 X2String xs = List2xString (""," ","") X2String xs *) + + (*--------------------------------------------------------------------*) + (* return a string representation of list; start with pre, separate *) + (* with sep and finish with post; use X2String for each element. *) + (*--------------------------------------------------------------------*) + fun Vector2xString (pre,sep,post) X2String vec = + if Vector.length vec=0 then pre^post + else String.concat + (pre::X2String(Vector.sub(vec,0)):: + Vector.foldri (fn (_,x,yet) => sep::X2String x::yet) [post] (vec,1,NONE)) + fun Vector2String X2String vec = Vector2xString ("#[",",","]") X2String vec + end diff -uNr fxp-2.0.orig/src/Util/utilString.sml fxp-2.0/src/Util/utilString.sml --- fxp-2.0.orig/src/Util/utilString.sml Sat Jun 26 02:42:57 2004 +++ fxp-2.0/src/Util/utilString.sml Thu Nov 1 08:58:58 2007 @@ -230,6 +230,9 @@ if Vector.length vec=0 then pre^post else String.concat (pre::X2String(Vector.sub(vec,0)):: - Vector.foldri (fn (_,x,yet) => sep::X2String x::yet) [post] (vec,1,NONE)) + VectorSlice.foldri + (fn (_,x,yet) => sep::X2String x::yet) + [post] + (VectorSlice.slice (vec,1,NONE))) fun Vector2String X2String vec = Vector2xString ("#[",",","]") X2String vec end diff -uNr fxp-2.0.orig/src/Util/utilTime.orig.sml fxp-2.0/src/Util/utilTime.orig.sml --- fxp-2.0.orig/src/Util/utilTime.orig.sml Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/Util/utilTime.orig.sml Thu Nov 1 08:59:00 2007 @@ -0,0 +1,37 @@ + + + +(*--------------------------------------------------------------------------*) +(* Structure: UtilTime *) +(* *) +(* Depends on: *) +(* *) +(* Exceptions raised by functions in this structure: *) +(* time : none *) +(* timeN : none *) +(*--------------------------------------------------------------------------*) +signature UtilTime = + sig + val time : ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} + val timeN : int -> ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} + end + +structure UtilTime : UtilTime = + struct + (*--------------------------------------------------------------------*) + (* run f on x, and measure the runtime. return the result and time. *) + (*--------------------------------------------------------------------*) + fun time f x = let val timer = Timer.startCPUTimer () + val y = f x + val ptime = Timer.checkCPUTimer timer + in (y,ptime) + end + + (*--------------------------------------------------------------------*) + (* run f n times on x, and measure the runtime. return the time. *) + (*--------------------------------------------------------------------*) + fun timeN n f x = + let fun iter m = if m<=1 then f x else (ignore (f x); iter (m-1)) + in time iter n + end + end diff -uNr fxp-2.0.orig/src/Util/utilTime.sml fxp-2.0/src/Util/utilTime.sml --- fxp-2.0.orig/src/Util/utilTime.sml Sat Jun 26 02:42:57 2004 +++ fxp-2.0/src/Util/utilTime.sml Thu Nov 1 08:58:58 2007 @@ -12,8 +12,8 @@ (*--------------------------------------------------------------------------*) signature UtilTime = sig - val time : ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} - val timeN : int -> ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} + val time : ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time} + val timeN : int -> ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time} end structure UtilTime : UtilTime = diff -uNr fxp-2.0.orig/src/config.cm fxp-2.0/src/config.cm --- fxp-2.0.orig/src/config.cm Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/config.cm Thu Nov 1 08:59:08 2007 @@ -0,0 +1,5 @@ +Group + structure Config +is + config.sml + diff -uNr fxp-2.0.orig/src/fxlib.cm fxp-2.0/src/fxlib.cm --- fxp-2.0.orig/src/fxlib.cm Sat Jun 26 02:42:57 2004 +++ fxp-2.0/src/fxlib.cm Thu Nov 1 08:59:05 2007 @@ -1,96 +1,64 @@ -Group is - Catalog/catData.sml - Catalog/catDtd.sml - Catalog/catError.sml - Catalog/catFile.sml - Catalog/catHooks.sml - Catalog/catOptions.sml - Catalog/catParams.sml - Catalog/catParse.sml - Catalog/catResolve.sml - Catalog/catalog.sml - Catalog/socatParse.sml - Parser/Base/base.sml - Parser/Base/baseData.sml - Parser/Base/baseString.sml - Parser/Dfa/dfa.sml - Parser/Dfa/dfaData.sml - Parser/Dfa/dfaError.sml - Parser/Dfa/dfaOptions.sml - Parser/Dfa/dfaPassOne.sml - Parser/Dfa/dfaPassThree.sml - Parser/Dfa/dfaPassTwo.sml - Parser/Dfa/dfaString.sml - Parser/Dfa/dfaUtil.sml - Parser/Dtd/dtdAttributes.sml - Parser/Dtd/dtdDeclare.sml - Parser/Dtd/dtdManager.sml - Parser/Error/errorData.sml - Parser/Error/errorMessage.sml - Parser/Error/errorString.sml - Parser/Error/errorUtil.sml - Parser/Error/errors.sml - Parser/Error/expected.sml - Parser/Params/dtd.sml - Parser/Params/hookData.sml - Parser/Params/hooks.sml - Parser/Params/ignore.sml - Parser/Params/parserOptions.sml - Parser/Params/resolve.sml - Parser/Parse/parseBase.sml - Parser/Parse/parseContent.sml - Parser/Parse/parseDecl.sml - Parser/Parse/parseDocument.sml - Parser/Parse/parseDtd.sml - Parser/Parse/parseLiterals.sml - Parser/Parse/parseMisc.sml - Parser/Parse/parseNames.sml - Parser/Parse/parseRefs.sml - Parser/Parse/parseTags.sml - Parser/Parse/parseXml.sml - Parser/entities.sml - Parser/version.sml - Unicode/Chars/charClasses.sml - Unicode/Chars/charVecDict.sml - Unicode/Chars/dataDict.sml - Unicode/Chars/testClasses.sml - Unicode/Chars/uniChar.sml - Unicode/Chars/uniClasses.sml - Unicode/Chars/uniRanges.sml - Unicode/Decode/decode.sml - Unicode/Decode/decodeError.sml - Unicode/Decode/decodeFile.sml - Unicode/Decode/decodeMisc.sml - Unicode/Decode/decodeUcs2.sml - Unicode/Decode/decodeUcs4.sml - Unicode/Decode/decodeUtf16.sml - Unicode/Decode/decodeUtf8.sml - Unicode/Decode/decodeUtil.sml - Unicode/Encode/encode.sml - Unicode/Encode/encodeBasic.sml - Unicode/Encode/encodeError.sml - Unicode/Encode/encodeMisc.sml - Unicode/Uri/uri.sml - Unicode/Uri/uriDecode.sml - Unicode/Uri/uriDict.sml - Unicode/Uri/uriEncode.sml - Unicode/encoding.sml - Util/SymDict/dict.sml - Util/SymDict/intDict.sml - Util/SymDict/intListDict.sml - Util/SymDict/intSetDict.sml - Util/SymDict/key.sml - Util/SymDict/stringDict.sml - Util/SymDict/symbolTable.sml - Util/intLists.sml - Util/intSets.sml - Util/options.sml - Util/utilCompare.sml - Util/utilError.sml - Util/utilHash.sml - Util/utilInt.sml - Util/utilList.sml - Util/utilString.sml - Util/utilTime.sml - config.sml - genRandom.sml +Library + structure Config + + (* Parser *) + functor Parse + + structure Base + + signature Entities + functor Entities + structure Version + + structure DfaData + structure Errors + + signature Resolve + structure ResolveNull + signature Dtd + structure Dtd + structure HookData + structure IgnoreHooks + signature Hooks + signature ParserOptions + functor ParserOptions + + (* Util *) + signature IntSets + structure IntSets + signature Options + structure Options + signature UtilError + structure UtilError + signature UtilList + structure UtilList + signature UtilString + structure UtilString + + (* Catalog *) + signature CatError + structure CatError + signature CatOptions + functor CatOptions + signature CatParams + functor ResolveCatalog + + (* Unicode *) + signature Encoding + structure Encoding + signature Encode + structure Encode + signature Uri + structure Uri + signature UniChar + structure UniChar + signature UniClasses + structure UniClasses +is + Parser/parser.cm + Util/util.cm + Catalog/catalog.cm + Unicode/unicode.cm + + config.cm + diff -uNr fxp-2.0.orig/src/fxlib.mlb fxp-2.0/src/fxlib.mlb --- fxp-2.0.orig/src/fxlib.mlb Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/fxlib.mlb Thu Nov 1 08:59:08 2007 @@ -0,0 +1,104 @@ +ann + "deadCode true" + "nonexhaustiveMatch warn" +in + local + $(MLTON_ROOT)/basis/basis.mlb + in + config.sml + Util/utilCompare.sml + Util/utilString.sml + Util/utilError.sml + Util/utilHash.sml + Util/utilInt.sml + Util/utilList.sml + Util/utilTime.sml + Util/intLists.sml + Util/intSets.sml + Util/options.sml + Util/SymDict/key.sml + Util/SymDict/dict.sml + Util/SymDict/symbolTable.sml + Util/SymDict/intSetDict.sml + Util/SymDict/intDict.sml + Util/SymDict/intListDict.sml + Util/SymDict/stringDict.sml + Unicode/Chars/uniChar.sml + Unicode/Chars/charClasses.sml + Unicode/Chars/charVecDict.sml + Unicode/Chars/dataDict.sml + Unicode/Chars/uniRanges.sml + Unicode/Chars/uniClasses.sml + Unicode/Chars/testClasses.sml + Unicode/Uri/uriDecode.sml + Unicode/Uri/uriEncode.sml + Unicode/Uri/uri.sml + Unicode/Uri/uriDict.sml + Unicode/encoding.sml + Unicode/Encode/encodeBasic.sml + Unicode/Encode/encodeError.sml + Unicode/Encode/encodeMisc.sml + Unicode/Encode/encode.sml + Unicode/Decode/decodeFile.sml + Unicode/Decode/decodeError.sml + Unicode/Decode/decodeMisc.sml + Unicode/Decode/decodeUtil.sml + Unicode/Decode/decodeUcs2.sml + Unicode/Decode/decodeUcs4.sml + Unicode/Decode/decodeUtf16.sml + Unicode/Decode/decodeUtf8.sml + Unicode/Decode/decode.sml + Parser/version.sml + Parser/Dfa/dfaData.sml + Parser/Dfa/dfaError.sml + Parser/Dfa/dfaOptions.sml + Parser/Dfa/dfaUtil.sml + Parser/Dfa/dfaPassOne.sml + Parser/Dfa/dfaPassTwo.sml + Parser/Dfa/dfaPassThree.sml + Parser/Dfa/dfaString.sml + Parser/Dfa/dfa.sml + Parser/Error/errorData.sml + Parser/Error/errorString.sml + Parser/Error/errorMessage.sml + Parser/Error/errorUtil.sml + Parser/Error/expected.sml + Parser/Error/errors.sml + Parser/Base/baseData.sml + Parser/Base/baseString.sml + Parser/Base/base.sml + Parser/Params/dtd.sml + Parser/Params/hookData.sml + Parser/Params/hooks.sml + Parser/Params/ignore.sml + Parser/Params/parserOptions.sml + Parser/Params/resolve.sml + Parser/entities.sml + Parser/Dtd/dtdDeclare.sml + Parser/Dtd/dtdAttributes.sml + Parser/Dtd/dtdManager.sml + Parser/Parse/parseBase.sml + Parser/Parse/parseNames.sml + Parser/Parse/parseMisc.sml + Parser/Parse/parseXml.sml + Parser/Parse/parseRefs.sml + Parser/Parse/parseLiterals.sml + Parser/Parse/parseTags.sml + Parser/Parse/parseDecl.sml + Parser/Parse/parseDtd.sml + Parser/Parse/parseContent.sml + Parser/Parse/parseDocument.sml + Catalog/catData.sml + Catalog/catDtd.sml + Catalog/catError.sml + Catalog/catParams.sml + Catalog/catFile.sml + Catalog/catHooks.sml + Catalog/catOptions.sml + Catalog/socatParse.sml + Catalog/catParse.sml + Catalog/catalog.sml + Catalog/catResolve.sml + genRandom.sml + end +end diff -uNr fxp-2.0.orig/src/fxlib.smlnet fxp-2.0/src/fxlib.smlnet --- fxp-2.0.orig/src/fxlib.smlnet Thu Jan 1 10:00:00 1970 +++ fxp-2.0/src/fxlib.smlnet Thu Nov 1 20:00:19 2007 @@ -0,0 +1,222 @@ +source . + +(* Catalog *) +source Catalog + +structure CatError=Catalog/catError.sml, + CatFile=Catalog/catFile.sml + +functor CatHooks=Catalog/catHooks.sml, + CatOptions=Catalog/catOptions.sml, + CatParse=Catalog/catParse.sml, + ResolveCatalog=Catalog/catResolve.sml, + Catalog=Catalog/catalog.sml, + SocatParse=Catalog/socatParse.sml + +signature CatDtd=Catalog/catDtd.sml, + CatError=Catalog/catError.sml, + CatFile=Catalog/catFile.sml, + CatHooks=Catalog/catHooks.sml, + CatOptions=Catalog/catOptions.sml, + CatParse=Catalog/catParse.sml, + Catalog=Catalog/catalog.sml, + SocatParse=Catalog/socatParse.sml, + CatParams=Catalog/catParams.sml + +(* Parser *) +source Parser + +signature Entities=Parser/entities.sml + +(* Parser/Base *) +source Parser/Base + +signature BaseString=Parser/Base/baseString.sml + +(* Parser/Dfa *) +source Parser/Dfa + +signature Dfa=Parser/Dfa/dfa.sml, + DfaData=Parser/Dfa/dfaData.sml, + DfaError=Parser/Dfa/dfaError.sml, + DfaOptions=Parser/Dfa/dfaOptions.sml, + DfaPassOne=Parser/Dfa/dfaPassOne.sml, + DfaPassTwo=Parser/Dfa/dfaPassTwo.sml, + DfaPassThree=Parser/Dfa/dfaPassThree.sml, + DfaString=Parser/Dfa/dfaString.sml, + DfaUtil=Parser/Dfa/dfaUtil.sml + +structure DfaBase=Parser/Dfa/dfaData.sml + +(* Parser/Dtd *) +source Parser/Dtd + +signature DtdManager=Parser/Dtd/dtdManager.sml + +(* Parser/Error *) +source Parser/Error + +signature ErrorMessage=Parser/Error/errorMessage.sml, + ErrorString=Parser/Error/errorString.sml, + ErrorUtil=Parser/Error/errorUtil.sml + +(* Parser/Params *) +source Parser/Params + +signature Dtd=Parser/Params/dtd.sml, + Hooks=Parser/Params/hooks.sml, + ParserOptions=Parser/Params/parserOptions.sml, + Resolve=Parser/Params/resolve.sml + +structure ResolveNull=Parser/Params/resolve.sml, + IgnoreHooks=Parser/Params/ignore.sml + +functor ParserOptions=Parser/Params/parserOptions.sml + +(* Parser/Parse *) +source Parser/Parse + +signature ParseBase=Parser/Parse/parseBase.sml, + ParseContent=Parser/Parse/parseContent.sml, + ParseDecl=Parser/Parse/parseDecl.sml, + ParseDtd=Parser/Parse/parseDtd.sml, + ParseLiterals=Parser/Parse/parseLiterals.sml, + ParseMisc=Parser/Parse/parseMisc.sml, + ParseNames=Parser/Parse/parseNames.sml, + ParseRefs=Parser/Parse/parseRefs.sml, + ParseTags=Parser/Parse/parseTags.sml, + ParseXml=Parser/Parse/parseXml.sml + + +functor Parse=Parser/Parse/parseDocument.sml + +(* Unicode *) +source Unicode + +signature Encoding=Unicode/encoding.sml + +(* Unicode/Chars *) +source Unicode/Chars + +signature CharClasses=Unicode/Chars/charClasses.sml, + UniChar=Unicode/Chars/uniChar.sml, + UniClasses=Unicode/Chars/uniClasses.sml + +structure VectorDict=Unicode/Chars/charVecDict.sml, + DataSymTab=Unicode/Chars/dataDict.sml, + KeyData=Unicode/Chars/dataDict.sml + +(* Unicode/Decode *) +source Unicode/Decode + +signature Decode=Unicode/Decode/decode.sml, + DecodeError=Unicode/Decode/decodeError.sml, + DecodeFile=Unicode/Decode/decodeFile.sml, + DecodeMisc=Unicode/Decode/decodeMisc.sml, + DecodeUcs2=Unicode/Decode/decodeUcs2.sml, + DecodeUcs4=Unicode/Decode/decodeUcs4.sml, + DecodeUtf16=Unicode/Decode/decodeUtf16.sml, + DecodeUtf8=Unicode/Decode/decodeUtf8.sml, + DecodeUtil=Unicode/Decode/decodeUtil.sml + +(* Unicode/Encode *) +source Unicode/Encode + +signature Encode=Unicode/Encode/encode.sml, + EncodeBasic=Unicode/Encode/encodeBasic.sml, + EncodeError=Unicode/Encode/encodeError.sml, + EncodeMisc=Unicode/Encode/encodeMisc.sml + +(* Unicode/Uri *) +source Unicode/Uri + +signature Uri=Unicode/Uri/uri.sml, + UriDecode=Unicode/Uri/uriDecode.sml, + UriEncode=Unicode/Uri/uriEncode.sml + +structure KeyUri=Unicode/Uri/uriDict.sml + +(* Util *) +source Util + +signature IntLists=Util/intLists.sml, + IntSets=Util/intSets.sml, + Options=Util/options.sml, + UtilCompare=Util/utilCompare.sml, + UtilError=Util/utilError.sml, + UtilHash=Util/utilHash.sml, + UtilInt=Util/utilInt.sml, + UtilList=Util/utilList.sml, + UtilString=Util/utilString.sml, + UtilTime=Util/utilTime.sml + +(* Util/SymDict *) +source Util/SymDict + +structure IntSymTab=Util/SymDict/intDict.sml, + IntListSymTab=Util/SymDict/intListDict.sml, + IntSetSymTab=Util/SymDict/intSetDict.sml, + KeyString=Util/SymDict/stringDict.sml, + KeyIntSet=Util/SymDict/intSetDict.sml + +functor SymTable=Util/SymDict/symbolTable.sml + +signature SymTable=Util/SymDict/symbolTable.sml, + Key=Util/SymDict/key.sml, + Dict=Util/SymDict/dict.sml + +(* Apps *) + +(* Apps/Canon *) +source Apps/Canon +signature CanonEncode=Apps/Canon/canonEncode.sml, + CanonOptions=Apps/Canon/canonOptions.sml, + CanonOutput=Apps/Canon/canonOutput.sml + +(* Apps/Copy *) +source Apps/Copy +signature CopyEncode=Apps/Copy/copyEncode.sml, + CopyOptions=Apps/Copy/copyOptions.sml, + CopyOutput=Apps/Copy/copyOutput.sml + +(* Apps/Null *) +source Apps/Null +signature NullOptions=Apps/Null/nullOptions.sml + +(* Apps/Esis *) +source Apps/Esis +signature Esis=Apps/Esis/esis.sml, + EsisOptions=Apps/Esis/esisOptions.sml, + EsisOutput=Apps/Esis/esisOutput.sml + +(* Apps/Viz *) +source Apps/Viz +signature Viz=Apps/Viz/viz.sml, + VizOptions=Apps/Viz/vizOptions.sml + +(* Compiling Apps (comment out to use the library): *) +export CanonProg +out fxcanon.exe +make +export + +export CopyProg +out fxcopy.exe +make +export + +export NullProg +out fxp.exe +make +export + +export EsisProg +out fxesis.exe +make +export + +export VizProg +out fxviz.exe +make +export +