diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f1e3d20 --- /dev/null +++ b/.gitignore @@ -0,0 +1,252 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5c9bdb3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2011 Roman Ivantsov + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Sanchime.Irony.sln b/Sanchime.Irony.sln new file mode 100644 index 0000000..792b337 --- /dev/null +++ b/Sanchime.Irony.sln @@ -0,0 +1,49 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.2.32630.192 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{24CE2713-8206-4C06-9397-7AD757E7D002}" + ProjectSection(SolutionItems) = preProject + build\local_publish.ps1 = build\local_publish.ps1 + build\subst_version.ps1 = build\subst_version.ps1 + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sanchime.Irony", "src\Irony\Sanchime.Irony.csproj", "{0E7499D3-3692-4F0A-AF22-695AAFF46A5F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sanchime.Irony.Interpreter", "src\Irony.Interpreter\Sanchime.Irony.Interpreter.csproj", "{A464E8CE-0EF5-41DB-AD71-6982F1BAE0D3}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sanchime.Irony.Tests", "src\Irony.Tests\Sanchime.Irony.Tests.csproj", "{9177CCEE-4279-4A3C-9967-E1B0E9272521}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sanchime.Irony.SampleApp", "src\Irony.SampleApp\Sanchime.Irony.SampleApp.csproj", "{184455FD-0D48-42E1-AB72-00E6A0A452EC}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {0E7499D3-3692-4F0A-AF22-695AAFF46A5F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0E7499D3-3692-4F0A-AF22-695AAFF46A5F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0E7499D3-3692-4F0A-AF22-695AAFF46A5F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0E7499D3-3692-4F0A-AF22-695AAFF46A5F}.Release|Any CPU.Build.0 = Release|Any CPU + {A464E8CE-0EF5-41DB-AD71-6982F1BAE0D3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A464E8CE-0EF5-41DB-AD71-6982F1BAE0D3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A464E8CE-0EF5-41DB-AD71-6982F1BAE0D3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A464E8CE-0EF5-41DB-AD71-6982F1BAE0D3}.Release|Any CPU.Build.0 = Release|Any CPU + {9177CCEE-4279-4A3C-9967-E1B0E9272521}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9177CCEE-4279-4A3C-9967-E1B0E9272521}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9177CCEE-4279-4A3C-9967-E1B0E9272521}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9177CCEE-4279-4A3C-9967-E1B0E9272521}.Release|Any CPU.Build.0 = Release|Any CPU + {184455FD-0D48-42E1-AB72-00E6A0A452EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {184455FD-0D48-42E1-AB72-00E6A0A452EC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {184455FD-0D48-42E1-AB72-00E6A0A452EC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {184455FD-0D48-42E1-AB72-00E6A0A452EC}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3FBB2DED-F7C5-4EA6-B8D6-50AD8F561D33} + EndGlobalSection +EndGlobal diff --git a/src/Irony.Interpreter/Ast/AstContext/InterpreterAstContext.cs b/src/Irony.Interpreter/Ast/AstContext/InterpreterAstContext.cs new file mode 100644 index 0000000..934a35e --- /dev/null +++ b/src/Irony.Interpreter/Ast/AstContext/InterpreterAstContext.cs @@ -0,0 +1,19 @@ +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Ast.PrimitiveNodes; + +namespace Sanchime.Irony.Interpreter.Ast +{ + //Extension of AstContext + public class InterpreterAstContext : AstContext + { + public readonly OperatorHandler OperatorHandler; + + public InterpreterAstContext(LanguageData language, OperatorHandler operatorHandler = null) : base(language) + { + OperatorHandler = operatorHandler ?? new OperatorHandler(language.Grammar.CaseSensitive); + base.DefaultIdentifierNodeType = typeof(IdentifierNode); + base.DefaultLiteralNodeType = typeof(LiteralValueNode); + base.DefaultNodeType = null; + } + }//class +}//ns \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/AstContext/OperatorHandler.cs b/src/Irony.Interpreter/Ast/AstContext/OperatorHandler.cs new file mode 100644 index 0000000..d5bbc73 --- /dev/null +++ b/src/Irony.Interpreter/Ast/AstContext/OperatorHandler.cs @@ -0,0 +1,147 @@ +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast +{ + public class OperatorInfo + { + public string Symbol; + public ExpressionType ExpressionType; + public int Precedence; + public Associativity Associativity; + } + + public class OperatorInfoDictionary : Dictionary + { + public OperatorInfoDictionary(bool caseSensitive) : base(caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase) + { + } + + public void Add(string symbol, ExpressionType expressionType, int precedence, Associativity associativity = Associativity.Left) + { + var info = new OperatorInfo() + { + Symbol = symbol, + ExpressionType = expressionType, + Precedence = precedence, + Associativity = associativity + }; + this[symbol] = info; + } + }//class + + public class OperatorHandler + { + private OperatorInfoDictionary _registeredOperators; + + public OperatorHandler(bool languageCaseSensitive) + { + _registeredOperators = new OperatorInfoDictionary(languageCaseSensitive); + BuildDefaultOperatorMappings(); + } + + public ExpressionType GetOperatorExpressionType(string symbol) + { + OperatorInfo opInfo; + if (_registeredOperators.TryGetValue(symbol, out opInfo)) + return opInfo.ExpressionType; + return CustomExpressionTypes.NotAnExpression; + } + + public virtual ExpressionType GetUnaryOperatorExpressionType(string symbol) + { + return symbol.ToLowerInvariant() switch + { + "+" => ExpressionType.UnaryPlus, + "-" => ExpressionType.Negate, + "!" or "not" or "~" => ExpressionType.Not, + _ => CustomExpressionTypes.NotAnExpression, + }; + } + + public virtual ExpressionType GetBinaryOperatorForAugmented(ExpressionType augmented) + { + return augmented switch + { + ExpressionType.AddAssign or ExpressionType.AddAssignChecked => ExpressionType.AddChecked, + ExpressionType.AndAssign => ExpressionType.And, + ExpressionType.Decrement => ExpressionType.SubtractChecked, + ExpressionType.DivideAssign => ExpressionType.Divide, + ExpressionType.ExclusiveOrAssign => ExpressionType.ExclusiveOr, + ExpressionType.LeftShiftAssign => ExpressionType.LeftShift, + ExpressionType.ModuloAssign => ExpressionType.Modulo, + ExpressionType.MultiplyAssign or ExpressionType.MultiplyAssignChecked => ExpressionType.MultiplyChecked, + ExpressionType.OrAssign => ExpressionType.Or, + ExpressionType.RightShiftAssign => ExpressionType.RightShift, + ExpressionType.SubtractAssign or ExpressionType.SubtractAssignChecked => ExpressionType.SubtractChecked, + _ => CustomExpressionTypes.NotAnExpression, + }; + } + + public virtual OperatorInfoDictionary BuildDefaultOperatorMappings() + { + var dict = _registeredOperators; + dict.Clear(); + int p = 0; //precedence + + p += 10; + dict.Add("=", ExpressionType.Assign, p); + dict.Add("+=", ExpressionType.AddAssignChecked, p); + dict.Add("-=", ExpressionType.SubtractAssignChecked, p); + dict.Add("*=", ExpressionType.MultiplyAssignChecked, p); + dict.Add("/=", ExpressionType.DivideAssign, p); + dict.Add("%=", ExpressionType.ModuloAssign, p); + dict.Add("|=", ExpressionType.OrAssign, p); + dict.Add("&=", ExpressionType.AndAssign, p); + dict.Add("^=", ExpressionType.ExclusiveOrAssign, p); + + p += 10; + dict.Add("==", ExpressionType.Equal, p); + dict.Add("!=", ExpressionType.NotEqual, p); + dict.Add("<>", ExpressionType.NotEqual, p); + + p += 10; + dict.Add("<", ExpressionType.LessThan, p); + dict.Add("<=", ExpressionType.LessThanOrEqual, p); + dict.Add(">", ExpressionType.GreaterThan, p); + dict.Add(">=", ExpressionType.GreaterThanOrEqual, p); + + p += 10; + dict.Add("|", ExpressionType.Or, p); + dict.Add("or", ExpressionType.Or, p); + dict.Add("||", ExpressionType.OrElse, p); + dict.Add("orelse", ExpressionType.OrElse, p); + dict.Add("^", ExpressionType.ExclusiveOr, p); + dict.Add("xor", ExpressionType.ExclusiveOr, p); + + p += 10; + dict.Add("&", ExpressionType.And, p); + dict.Add("and", ExpressionType.And, p); + dict.Add("&&", ExpressionType.AndAlso, p); + dict.Add("andalso", ExpressionType.AndAlso, p); + + p += 10; + dict.Add("!", ExpressionType.Not, p); + dict.Add("not", ExpressionType.Not, p); + + p += 10; + dict.Add("<<", ExpressionType.LeftShift, p); + dict.Add(">>", ExpressionType.RightShift, p); + + p += 10; + dict.Add("+", ExpressionType.AddChecked, p); + dict.Add("-", ExpressionType.SubtractChecked, p); + + p += 10; + dict.Add("*", ExpressionType.MultiplyChecked, p); + dict.Add("/", ExpressionType.Divide, p); + dict.Add("%", ExpressionType.Modulo, p); + dict.Add("**", ExpressionType.Power, p); + + p += 10; + dict.Add("??", ExpressionType.Coalesce, p); + dict.Add("?", ExpressionType.Conditional, p); + + return dict; + }//method + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Base/AstInterfaces.cs b/src/Irony.Interpreter/Ast/Base/AstInterfaces.cs new file mode 100644 index 0000000..c1853be --- /dev/null +++ b/src/Irony.Interpreter/Ast/Base/AstInterfaces.cs @@ -0,0 +1,44 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast +{ + //This interface is expected by Irony's Gramamr Explorer. + public interface ICallTarget + { + object Call(ScriptThread thread, object[] parameters); + } + + //Simple visitor interface + public interface IAstVisitor + { + void BeginVisit(IVisitableNode node); + + void EndVisit(IVisitableNode node); + } + + public interface IVisitableNode + { + void AcceptVisitor(IAstVisitor visitor); + } + + public interface IOperatorHelper + { + ExpressionType GetOperatorExpressionType(string symbol); + + ExpressionType GetUnaryOperatorExpressionType(string symbol); + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Base/AstNode.cs b/src/Irony.Interpreter/Ast/Base/AstNode.cs new file mode 100644 index 0000000..a59d60e --- /dev/null +++ b/src/Irony.Interpreter/Ast/Base/AstNode.cs @@ -0,0 +1,227 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Ast.SpecialNodes; +using Sanchime.Irony.Interpreter.Scopes; +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast +{ + public static class CustomExpressionTypes + { + public const ExpressionType NotAnExpression = (ExpressionType)(-1); + } + + public class AstNodeList : List + { } + + //Base AST node class + public partial class AstNode : IAstNodeInit, IBrowsableAstNode, IVisitableNode + { + public AstNode Parent; + public BnfTerm Term; + public SourceSpan Span { get; set; } + public AstNodeFlags Flags; + protected ExpressionType ExpressionType = CustomExpressionTypes.NotAnExpression; + protected object LockObject = new object(); + + //Used for pointing to error location. For most nodes it would be the location of the node itself. + // One exception is BinExprNode: when we get "Division by zero" error evaluating + // x = (5 + 3) / (2 - 2) + // it is better to point to "/" as error location, rather than the first "(" - which is the start + // location of binary expression. + public SourceLocation ErrorAnchor; + + //UseType is set by parent + public NodeUseType UseType = NodeUseType.Unknown; + + // Role is a free-form string used as prefix in ToString() representation of the node. + // Node's parent can set it to "property name" or role of the child node in parent's node currentFrame.Context. + public string Role; + + // Default AstNode.ToString() returns 'Role: AsString', which is used for showing node in AST tree. + public virtual string AsString { get; protected set; } + + public readonly AstNodeList ChildNodes = new AstNodeList(); //List of child nodes + + //Reference to Evaluate method implementation. Initially set to DoEvaluate virtual method. + public EvaluateMethod Evaluate; + + public ValueSetterMethod SetValue; + + // Public default constructor + public AstNode() + { + Evaluate = DoEvaluate; + SetValue = DoSetValue; + } + + public SourceLocation Location + { get { return Span.Location; } } + + #region IAstNodeInit Members + + public virtual void Init(AstContext context, ParseTreeNode treeNode) + { + Term = treeNode.Term; + Span = treeNode.Span; + ErrorAnchor = Location; + treeNode.AstNode = this; + AsString = Term == null ? GetType().Name : Term.Name; + } + + #endregion + + //ModuleNode - computed on demand + public AstNode ModuleNode + { + get + { + if (_moduleNode == null) + { + _moduleNode = Parent == null ? this : Parent.ModuleNode; + } + return _moduleNode; + } + set { _moduleNode = value; } + } + + private AstNode _moduleNode; + + #region virtual methods: DoEvaluate, SetValue, IsConstant, SetIsTail, GetDependentScopeInfo + + public virtual void Reset() + { + _moduleNode = null; + Evaluate = DoEvaluate; + foreach (var child in ChildNodes) + child.Reset(); + } + + //By default the Evaluate field points to this method. + protected virtual object DoEvaluate(ScriptThread thread) + { + //These 2 lines are standard prolog/epilog statements. Place them in every Evaluate and SetValue implementations. + thread.CurrentNode = this; //standard prolog + thread.CurrentNode = Parent; //standard epilog + return null; + } + + public virtual void DoSetValue(ScriptThread thread, object value) + { + //Place the prolog/epilog lines in every implementation of SetValue method (see DoEvaluate above) + } + + public virtual bool IsConstant() + { + return false; + } + + /// + /// Sets a flag indicating that the node is in tail position. The value is propagated from parent to children. + /// Should propagate this call to appropriate children. + /// + public virtual void SetIsTail() + { + Flags |= AstNodeFlags.IsTail; + } + + /// + /// Dependent scope is a scope produced by the node. For ex, FunctionDefNode defines a scope + /// + public virtual ScopeInfo DependentScopeInfo + { + get { return _dependentScope; } + set { _dependentScope = value; } + } + + private ScopeInfo _dependentScope; + + #endregion + + #region IBrowsableAstNode Members + + public virtual System.Collections.IEnumerable GetChildNodes() + { + return ChildNodes; + } + + public int Position + { + get { return Span.Location.Position; } + } + + #endregion + + #region Visitors, Iterators + + //the first primitive Visitor facility + public virtual void AcceptVisitor(IAstVisitor visitor) + { + visitor.BeginVisit(this); + if (ChildNodes.Count > 0) + foreach (AstNode node in ChildNodes) + node.AcceptVisitor(visitor); + visitor.EndVisit(this); + } + + //Node traversal + public IEnumerable GetAll() + { + AstNodeList result = new AstNodeList(); + AddAll(result); + return result; + } + + private void AddAll(AstNodeList list) + { + list.Add(this); + foreach (AstNode child in ChildNodes) + if (child != null) + child.AddAll(list); + } + + #endregion + + #region overrides: ToString + + public override string ToString() + { + return string.IsNullOrEmpty(Role) ? AsString : Role + ": " + AsString; + } + + #endregion + + #region Utility methods: AddChild, HandleError + + protected AstNode AddChild(string role, ParseTreeNode childParseNode) + { + return AddChild(NodeUseType.Unknown, role, childParseNode); + } + + protected AstNode AddChild(NodeUseType useType, string role, ParseTreeNode childParseNode) + { + var child = (AstNode)childParseNode.AstNode; + if (child == null) + child = new NullNode(childParseNode.Term); //put a stub to throw an exception with clear message on attempt to evaluate. + child.Role = role; + child.Parent = this; + ChildNodes.Add(child); + return child; + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Base/BasicTypes.cs b/src/Irony.Interpreter/Ast/Base/BasicTypes.cs new file mode 100644 index 0000000..d34a8ca --- /dev/null +++ b/src/Irony.Interpreter/Ast/Base/BasicTypes.cs @@ -0,0 +1,42 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Ast +{ + public delegate object EvaluateMethod(ScriptThread thread); + + public delegate void ValueSetterMethod(ScriptThread thread, object value); + + [Flags] + public enum AstNodeFlags + { + None = 0x0, + IsTail = 0x01, //the node is in tail position + //IsScope = 0x02, //node defines scope for local variables + } + + [Flags] + public enum NodeUseType + { + Unknown, + Name, //identifier used as a Name container - system would not use it's Evaluate method directly + CallTarget, + ValueRead, + ValueWrite, + ValueReadWrite, + Parameter, + Keyword, + SpecialSymbol, + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/BinaryOperationNode.cs b/src/Irony.Interpreter/Ast/Expressions/BinaryOperationNode.cs new file mode 100644 index 0000000..f8bb2f3 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/BinaryOperationNode.cs @@ -0,0 +1,138 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + public class BinaryOperationNode : AstNode + { + public AstNode Left, Right; + public string OpSymbol; + public ExpressionType Op; + private OperatorImplementation _lastUsed; + private object _constValue; + private int _failureCount; + + public BinaryOperationNode() + { } + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + Left = AddChild("Arg", nodes[0]); + Right = AddChild("Arg", nodes[2]); + var opToken = nodes[1].FindToken(); + OpSymbol = opToken.Text; + var ictxt = context as InterpreterAstContext; + Op = ictxt.OperatorHandler.GetOperatorExpressionType(OpSymbol); + // Set error anchor to operator, so on error (Division by zero) the explorer will point to + // operator node as location, not to the very beginning of the first operand. + ErrorAnchor = opToken.Location; + AsString = Op + "(operator)"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + //assign implementation method + Evaluate = Op switch + { + ExpressionType.AndAlso => EvaluateAndAlso, + ExpressionType.OrElse => EvaluateOrElse, + _ => DefaultEvaluateImplementation, + }; + // actually evaluate and get the result. + var result = Evaluate(thread); + // Check if result is constant - if yes, save the value and switch to method that directly returns the result. + if (IsConstant()) + { + _constValue = result; + AsString = Op + "(operator) Const=" + _constValue; + Evaluate = EvaluateConst; + } + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateAndAlso(ScriptThread thread) + { + var leftValue = Left.Evaluate(thread); + if (!thread.Runtime.IsTrue(leftValue)) return leftValue; //if false return immediately + return Right.Evaluate(thread); + } + + private object EvaluateOrElse(ScriptThread thread) + { + var leftValue = Left.Evaluate(thread); + if (thread.Runtime.IsTrue(leftValue)) return leftValue; + return Right.Evaluate(thread); + } + + protected object EvaluateFast(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var arg1 = Left.Evaluate(thread); + var arg2 = Right.Evaluate(thread); + //If we have _lastUsed, go straight for it; if types mismatch it will throw + if (_lastUsed != null) + { + try + { + var res = _lastUsed.EvaluateBinary(arg1, arg2); + thread.CurrentNode = Parent; //standard epilog + return res; + } + catch + { + _lastUsed = null; + _failureCount++; + // if failed 3 times, change to method without direct try + if (_failureCount > 3) + Evaluate = DefaultEvaluateImplementation; + } //catch + }// if _lastUsed + // go for normal evaluation + var result = thread.Runtime.ExecuteBinaryOperator(Op, arg1, arg2, ref _lastUsed); + thread.CurrentNode = Parent; //standard epilog + return result; + }//method + + protected object DefaultEvaluateImplementation(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var arg1 = Left.Evaluate(thread); + var arg2 = Right.Evaluate(thread); + var result = thread.Runtime.ExecuteBinaryOperator(Op, arg1, arg2, ref _lastUsed); + thread.CurrentNode = Parent; //standard epilog + return result; + }//method + + private object EvaluateConst(ScriptThread thread) + { + return _constValue; + } + + public override bool IsConstant() + { + if (_isConstant) return true; + _isConstant = Left.IsConstant() && Right.IsConstant(); + return _isConstant; + } + + private bool _isConstant; + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/ExpressionListNode.cs b/src/Irony.Interpreter/Ast/Expressions/ExpressionListNode.cs new file mode 100644 index 0000000..f3c9ee9 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/ExpressionListNode.cs @@ -0,0 +1,44 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + //A node representing expression list - for example, list of argument expressions in function call + public class ExpressionListNode : AstNode + { + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + foreach (var child in treeNode.ChildNodes) + { + AddChild(NodeUseType.Parameter, "expr", child); + } + AsString = "Expression list"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var values = new object[ChildNodes.Count]; + for (int i = 0; i < values.Length; i++) + { + values[i] = ChildNodes[i].Evaluate(thread); + } + thread.CurrentNode = Parent; //standard epilog + return values; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/IfNode.cs b/src/Irony.Interpreter/Ast/Expressions/IfNode.cs new file mode 100644 index 0000000..19c1a2d --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/IfNode.cs @@ -0,0 +1,64 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + public class IfNode : AstNode + { + public AstNode Test; + public AstNode IfTrue; + public AstNode IfFalse; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + Test = AddChild("Test", nodes[0]); + IfTrue = AddChild("IfTrue", nodes[1]); + if (nodes.Count > 2) + IfFalse = AddChild("IfFalse", nodes[2]); + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + object result = null; + var test = Test.Evaluate(thread); + var isTrue = thread.Runtime.IsTrue(test); + if (isTrue) + { + if (IfTrue != null) + result = IfTrue.Evaluate(thread); + } + else + { + if (IfFalse != null) + result = IfFalse.Evaluate(thread); + } + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void SetIsTail() + { + base.SetIsTail(); + if (IfTrue != null) + IfTrue.SetIsTail(); + if (IfFalse != null) + IfFalse.SetIsTail(); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/IncDecNode.cs b/src/Irony.Interpreter/Ast/Expressions/IncDecNode.cs new file mode 100644 index 0000000..144278c --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/IncDecNode.cs @@ -0,0 +1,68 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + public class IncDecNode : AstNode + { + public bool IsPostfix; + public string OpSymbol; + public string BinaryOpSymbol; //corresponding binary operation: + for ++, - for -- + public ExpressionType BinaryOp; + public AstNode Argument; + private OperatorImplementation _lastUsed; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + FindOpAndDetectPostfix(nodes); + int argIndex = IsPostfix ? 0 : 1; + Argument = AddChild(NodeUseType.ValueReadWrite, "Arg", nodes[argIndex]); + BinaryOpSymbol = OpSymbol[0].ToString(); //take a single char out of ++ or -- + var interpContext = (InterpreterAstContext)context; + BinaryOp = interpContext.OperatorHandler.GetOperatorExpressionType(BinaryOpSymbol); + base.AsString = OpSymbol + (IsPostfix ? "(postfix)" : "(prefix)"); + } + + private void FindOpAndDetectPostfix(ParseTreeNodeList mappedNodes) + { + IsPostfix = false; //assume it + OpSymbol = mappedNodes[0].FindTokenAndGetText(); + if (OpSymbol == "--" || OpSymbol == "++") return; + IsPostfix = true; + OpSymbol = mappedNodes[1].FindTokenAndGetText(); + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var oldValue = Argument.Evaluate(thread); + var newValue = thread.Runtime.ExecuteBinaryOperator(BinaryOp, oldValue, 1, ref _lastUsed); + Argument.SetValue(thread, newValue); + var result = IsPostfix ? oldValue : newValue; + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void SetIsTail() + { + base.SetIsTail(); + Argument.SetIsTail(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/IndexedAccessNode.cs b/src/Irony.Interpreter/Ast/Expressions/IndexedAccessNode.cs new file mode 100644 index 0000000..88a80f7 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/IndexedAccessNode.cs @@ -0,0 +1,91 @@ +using Sanchime.Irony.Ast; +using System.Collections; +using System.Reflection; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + public class IndexedAccessNode : AstNode + { + private AstNode _target, _index; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + _target = AddChild("Target", nodes.First()); + _index = AddChild("Index", nodes.Last()); + AsString = "[" + _index + "]"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + object result = null; + var targetValue = _target.Evaluate(thread); + if (targetValue == null) + thread.ThrowScriptError("Target object is null."); + var type = targetValue.GetType(); + var indexValue = _index.Evaluate(thread); + //string and array are special cases + if (type == typeof(string)) + { + var sTarget = targetValue as string; + var iIndex = Convert.ToInt32(indexValue); + result = sTarget[iIndex]; + } + else if (type.IsArray) + { + var arr = targetValue as Array; + var iIndex = Convert.ToInt32(indexValue); + result = arr.GetValue(iIndex); + } + else if (targetValue is IDictionary dict) + { + result = dict[indexValue]; + } + else + { + //const BindingFlags flags = BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase | BindingFlags.InvokeMethod; + //result = type.InvokeMember("get_Item", flags, null, targetValue, new object[] { indexValue }); + + var methodInfo = type.GetTypeInfo().GetDeclaredMethod("get_Item"); + methodInfo.Invoke(targetValue, new object[] { indexValue }); + } + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void DoSetValue(ScriptThread thread, object value) + { + thread.CurrentNode = this; //standard prolog + var targetValue = _target.Evaluate(thread); + if (targetValue == null) + thread.ThrowScriptError("Target object is null."); + var type = targetValue.GetType(); + var indexValue = _index.Evaluate(thread); + //string and array are special cases + if (type == typeof(string)) + { + thread.ThrowScriptError("String is read-only."); + } + else if (type.IsArray) + { + var arr = targetValue as Array; + var iIndex = Convert.ToInt32(indexValue); + arr.SetValue(value, iIndex); + } + else if (targetValue is IDictionary dict) + { + dict[indexValue] = value; + } + else + { + //const BindingFlags flags = BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase | BindingFlags.InvokeMethod; + var methodInfo = type.GetTypeInfo().GetDeclaredMethod("set_Item"); + methodInfo.Invoke(targetValue, new object[] { indexValue, value }); + //type.InvokeMember("set_Item", flags, null, targetValue, new object[] { indexValue, value }); + } + thread.CurrentNode = Parent; //standard epilog + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/MemberAccessNode.cs b/src/Irony.Interpreter/Ast/Expressions/MemberAccessNode.cs new file mode 100644 index 0000000..20fccc6 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/MemberAccessNode.cs @@ -0,0 +1,91 @@ +using Sanchime.Irony.Ast; +using System.Reflection; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + //For now we do not support dotted namespace/type references like System.Collections or System.Collections.List. + // Only references to objects like 'objFoo.Name' or 'objFoo.DoStuff()' + public class MemberAccessNode : AstNode + { + private AstNode _left; + private string _memberName; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + _left = AddChild("Target", nodes[0]); + var right = nodes[nodes.Count - 1]; + _memberName = right.FindTokenAndGetText(); + ErrorAnchor = right.Span.Location; + AsString = "." + _memberName; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + object result = null; + var leftValue = _left.Evaluate(thread); + if (leftValue == null) + thread.ThrowScriptError("Target object is null."); + var type = leftValue.GetType(); + var members = type.GetMember(_memberName); + if (members == null || members.Length == 0) + thread.ThrowScriptError("Member {0} not found in object of type {1}.", _memberName, type); + var member = members[0]; + switch (member.MemberType) + { + case MemberTypes.Property: + var propInfo = member as PropertyInfo; + result = propInfo.GetValue(leftValue, null); + break; + + case MemberTypes.Field: + var fieldInfo = member as FieldInfo; + result = fieldInfo.GetValue(leftValue); + break; + + case MemberTypes.Method: + result = new ClrMethodBindingTargetInfo(type, _memberName, leftValue); //this bindingInfo works as a call target + break; + + default: + thread.ThrowScriptError("Invalid member type ({0}) for member {1} of type {2}.", member.MemberType, _memberName, type); + result = null; + break; + }//switch + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void DoSetValue(ScriptThread thread, object value) + { + thread.CurrentNode = this; //standard prolog + var leftValue = _left.Evaluate(thread); + if (leftValue == null) + thread.ThrowScriptError("Target object is null."); + var type = leftValue.GetType(); + var members = type.GetMember(_memberName); + if (members == null || members.Length == 0) + thread.ThrowScriptError("Member {0} not found in object of type {1}.", _memberName, type); + var member = members[0]; + switch (member.MemberType) + { + case MemberTypes.Property: + var propInfo = member as PropertyInfo; + propInfo.SetValue(leftValue, value, null); + break; + + case MemberTypes.Field: + var fieldInfo = member as FieldInfo; + fieldInfo.SetValue(leftValue, value); + break; + + default: + thread.ThrowScriptError("Cannot assign to member {0} of type {1}.", _memberName, type); + break; + }//switch + thread.CurrentNode = Parent; //standard epilog + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Expressions/UnaryOperationNode.cs b/src/Irony.Interpreter/Ast/Expressions/UnaryOperationNode.cs new file mode 100644 index 0000000..13e8702 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Expressions/UnaryOperationNode.cs @@ -0,0 +1,51 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.Expressions +{ + public class UnaryOperationNode : AstNode + { + public string OpSymbol; + public AstNode Argument; + private OperatorImplementation _lastUsed; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + OpSymbol = nodes[0].FindTokenAndGetText(); + Argument = AddChild("Arg", nodes[1]); + base.AsString = OpSymbol + "(unary op)"; + var interpContext = (InterpreterAstContext)context; + ExpressionType = interpContext.OperatorHandler.GetUnaryOperatorExpressionType(OpSymbol); + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var arg = Argument.Evaluate(thread); + var result = thread.Runtime.ExecuteUnaryOperator(ExpressionType, arg, ref _lastUsed); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void SetIsTail() + { + base.SetIsTail(); + Argument.SetIsTail(); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Functions/Closure.cs b/src/Irony.Interpreter/Ast/Functions/Closure.cs new file mode 100644 index 0000000..3cbf85b --- /dev/null +++ b/src/Irony.Interpreter/Ast/Functions/Closure.cs @@ -0,0 +1,28 @@ +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Ast.Functions +{ + public class Closure : ICallTarget + { + //The scope that created closure; is used to find Parents (enclosing scopes) + public Scope ParentScope; + + public LambdaNode Lamda; + + public Closure(Scope parentScope, LambdaNode targetNode) + { + ParentScope = parentScope; + Lamda = targetNode; + } + + public object Call(ScriptThread thread, object[] parameters) + { + return Lamda.Call(ParentScope, thread, parameters); + } + + public override string ToString() + { + return Lamda.ToString(); //returns nice string like "" + } + } //class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Functions/FunctionCallNode.cs b/src/Irony.Interpreter/Ast/Functions/FunctionCallNode.cs new file mode 100644 index 0000000..5ae1b1d --- /dev/null +++ b/src/Irony.Interpreter/Ast/Functions/FunctionCallNode.cs @@ -0,0 +1,139 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Utilities; + +namespace Sanchime.Irony.Interpreter.Ast.Functions +{ + //A node representing function call. Also handles Special Forms + public class FunctionCallNode : AstNode + { + private AstNode TargetRef; + private AstNode Arguments; + private string _targetName; + private SpecialForm _specialForm; + private AstNode[] _specialFormArgs; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + TargetRef = AddChild("Target", nodes[0]); + TargetRef.UseType = NodeUseType.CallTarget; + _targetName = nodes[0].FindTokenAndGetText(); + Arguments = AddChild("Args", nodes[1]); + AsString = "Call " + _targetName; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + SetupEvaluateMethod(thread); + var result = Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private void SetupEvaluateMethod(ScriptThread thread) + { + var languageTailRecursive = thread.Runtime.Language.Grammar.LanguageFlags.IsSet(LanguageFlags.TailRecursive); + lock (LockObject) + { + var target = TargetRef.Evaluate(thread); + if (target is SpecialForm) + { + _specialForm = target as SpecialForm; + _specialFormArgs = Arguments.ChildNodes.ToArray(); + Evaluate = EvaluateSpecialForm; + } + else + { + if (languageTailRecursive) + { + var isTail = Flags.IsSet(AstNodeFlags.IsTail); + if (isTail) + Evaluate = EvaluateTail; + else + Evaluate = EvaluateWithTailCheck; + } + else + Evaluate = EvaluateNoTail; + } + }//lock + } + + // Evaluation for special forms + private object EvaluateSpecialForm(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var result = _specialForm(thread, _specialFormArgs); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + // Evaluation for non-tail languages + private object EvaluateNoTail(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var target = TargetRef.Evaluate(thread); + var iCall = target as ICallTarget; + if (iCall == null) + thread.ThrowScriptError(Resources.ErrVarIsNotCallable, _targetName); + var args = (object[])Arguments.Evaluate(thread); + object result = iCall.Call(thread, args); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + //Evaluation for tailed languages + private object EvaluateTail(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var target = TargetRef.Evaluate(thread); + var iCall = target as ICallTarget; + if (iCall == null) + thread.ThrowScriptError(Resources.ErrVarIsNotCallable, _targetName); + var args = (object[])Arguments.Evaluate(thread); + thread.Tail = iCall; + thread.TailArgs = args; + thread.CurrentNode = Parent; //standard epilog + return null; + } + + private object EvaluateWithTailCheck(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var target = TargetRef.Evaluate(thread); + var iCall = target as ICallTarget; + if (iCall == null) + thread.ThrowScriptError(Resources.ErrVarIsNotCallable, _targetName); + var args = (object[])Arguments.Evaluate(thread); + object result = null; + result = iCall.Call(thread, args); + //Note that after invoking tail we can get another tail. + // So we need to keep calling tails while they are there. + while (thread.Tail != null) + { + var tail = thread.Tail; + var tailArgs = thread.TailArgs; + thread.Tail = null; + thread.TailArgs = null; + result = tail.Call(thread, tailArgs); + } + thread.CurrentNode = Parent; //standard epilog + return result; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Functions/FunctionDefNode.cs b/src/Irony.Interpreter/Ast/Functions/FunctionDefNode.cs new file mode 100644 index 0000000..d723f7b --- /dev/null +++ b/src/Irony.Interpreter/Ast/Functions/FunctionDefNode.cs @@ -0,0 +1,56 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.Functions +{ + //A node representing function definition (named lambda) + public class FunctionDefNode : AstNode + { + public AstNode NameNode; + public LambdaNode Lambda; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + //child #0 is usually a keyword like "def" + var nodes = treeNode.GetMappedChildNodes(); + NameNode = AddChild("Name", nodes[1]); + Lambda = new LambdaNode(context, treeNode, nodes[2], nodes[3]) + { + Parent = this + }; //node, params, body + AsString = ""; + //Lamda will set treeNode.AstNode to itself, we need to set it back to "this" here + treeNode.AstNode = this; // + } + + public override void Reset() + { + DependentScopeInfo = null; + Lambda.Reset(); + base.Reset(); + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var closure = Lambda.Evaluate(thread); //returns closure + NameNode.SetValue(thread, closure); + thread.CurrentNode = Parent; //standard epilog + return closure; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Functions/LambdaNode.cs b/src/Irony.Interpreter/Ast/Functions/LambdaNode.cs new file mode 100644 index 0000000..860b19d --- /dev/null +++ b/src/Irony.Interpreter/Ast/Functions/LambdaNode.cs @@ -0,0 +1,103 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Ast.Functions +{ + //A node representing an anonymous function + public class LambdaNode : AstNode + { + public AstNode Parameters; + public AstNode Body; + + public LambdaNode() + { } + + //Used by FunctionDefNode + public LambdaNode(AstContext context, ParseTreeNode node, ParseTreeNode parameters, ParseTreeNode body) + { + InitImpl(context, node, parameters, body); + } + + public override void Init(AstContext context, ParseTreeNode parseNode) + { + var mappedNodes = parseNode.GetMappedChildNodes(); + InitImpl(context, parseNode, mappedNodes[0], mappedNodes[1]); + } + + private void InitImpl(AstContext context, ParseTreeNode parseNode, ParseTreeNode parametersNode, ParseTreeNode bodyNode) + { + base.Init(context, parseNode); + Parameters = AddChild("Parameters", parametersNode); + Body = AddChild("Body", bodyNode); + AsString = "Lambda[" + Parameters.ChildNodes.Count + "]"; + Body.SetIsTail(); //this will be propagated to the last statement + } + + public override void Reset() + { + DependentScopeInfo = null; + base.Reset(); + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + lock (LockObject) + { + if (DependentScopeInfo == null) + { + var langCaseSensitive = thread.App.Language.Grammar.CaseSensitive; + DependentScopeInfo = new ScopeInfo(this, langCaseSensitive); + } + // In the first evaluation the parameter list will add parameter's SlotInfo objects to Scope.ScopeInfo + thread.PushScope(DependentScopeInfo, null); + Parameters.Evaluate(thread); + thread.PopScope(); + //Set Evaluate method and invoke it later + Evaluate = EvaluateAfter; + } + var result = Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateAfter(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var closure = new Closure(thread.CurrentScope, this); + thread.CurrentNode = Parent; //standard epilog + return closure; + } + + public object Call(Scope creatorScope, ScriptThread thread, object[] parameters) + { + var save = thread.CurrentNode; //prolog, not standard - the caller is NOT target node's parent + thread.CurrentNode = this; + thread.PushClosureScope(DependentScopeInfo, creatorScope, parameters); + Parameters.Evaluate(thread); // pre-process parameters + var result = Body.Evaluate(thread); + thread.PopScope(); + thread.CurrentNode = save; //epilog, restoring caller + return result; + } + + public override void SetIsTail() + { + //ignore this call, do not mark this node as tail, it is meaningless + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Functions/ParamListNode.cs b/src/Irony.Interpreter/Ast/Functions/ParamListNode.cs new file mode 100644 index 0000000..b3a585f --- /dev/null +++ b/src/Irony.Interpreter/Ast/Functions/ParamListNode.cs @@ -0,0 +1,57 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Ast.PrimitiveNodes; +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Ast.Functions +{ + public class ParamListNode : AstNode + { + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + foreach (var child in treeNode.ChildNodes) + { + AddChild(NodeUseType.Parameter, "param", child); + } + + AsString = "param_list[" + ChildNodes.Count + "]"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + // Is called once, at first evaluation of FunctionDefNode + // Creates parameter slots + foreach (var child in ChildNodes) + { + if (child is IdentifierNode idNode) + { + thread.CurrentScope.Info.AddSlot(idNode.Symbol, SlotType.Parameter); + } + } + Evaluate = EvaluateAfter; + thread.CurrentNode = Parent; //standard epilog + return null; + }//method + + // TODO: implement handling list/dict parameter tails (Scheme, Python, etc) + private object EvaluateAfter(ScriptThread thread) + { + return null; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/PrimitiveNodes/IdentifierNode.cs b/src/Irony.Interpreter/Ast/PrimitiveNodes/IdentifierNode.cs new file mode 100644 index 0000000..37469a5 --- /dev/null +++ b/src/Irony.Interpreter/Ast/PrimitiveNodes/IdentifierNode.cs @@ -0,0 +1,56 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.PrimitiveNodes +{ + public class IdentifierNode : AstNode + { + public string Symbol; + private Binding _accessor; + + public IdentifierNode() + { } + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + Symbol = treeNode.Token.ValueString; + AsString = Symbol; + } + + //Executed only once, on the first call + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + _accessor = thread.Bind(Symbol, BindingRequestFlags.Read); + Evaluate = _accessor.GetValueRef; // Optimization - directly set method ref to accessor's method. EvaluateReader; + var result = Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + public override void DoSetValue(ScriptThread thread, object value) + { + thread.CurrentNode = this; //standard prolog + if (_accessor == null) + { + _accessor = thread.Bind(Symbol, BindingRequestFlags.Write | BindingRequestFlags.ExistingOrNew); + } + _accessor.SetValueRef(thread, value); + thread.CurrentNode = Parent; //standard epilog + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/PrimitiveNodes/LiteralValueNode.cs b/src/Irony.Interpreter/Ast/PrimitiveNodes/LiteralValueNode.cs new file mode 100644 index 0000000..b4614ad --- /dev/null +++ b/src/Irony.Interpreter/Ast/PrimitiveNodes/LiteralValueNode.cs @@ -0,0 +1,28 @@ +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.PrimitiveNodes +{ + public class LiteralValueNode : AstNode + { + public object Value; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + Value = treeNode.Token.Value; + AsString = Value == null ? "null" : Value.ToString(); + if (Value is string) + AsString = "\"" + AsString + "\""; + } + + protected override object DoEvaluate(ScriptThread thread) + { + return Value; + } + + public override bool IsConstant() + { + return true; + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/PrimitiveNodes/StringTemplateNode.cs b/src/Irony.Interpreter/Ast/PrimitiveNodes/StringTemplateNode.cs new file mode 100644 index 0000000..7f457e3 --- /dev/null +++ b/src/Irony.Interpreter/Ast/PrimitiveNodes/StringTemplateNode.cs @@ -0,0 +1,184 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.PrimitiveNodes +{ + // Implements Ruby-like active strings with embedded expressions + + /* Example of use: + + //String literal with embedded expressions ------------------------------------------------------------------ + var stringLit = new StringLiteral("string", "\"", StringOptions.AllowsAllEscapes | StringOptions.IsTemplate); + stringLit.AstNodeType = typeof(StringTemplateNode); + var Expr = new NonTerminal("Expr"); + var templateSettings = new StringTemplateSettings(); //by default set to Ruby-style settings + templateSettings.ExpressionRoot = Expr; //this defines how to evaluate expressions inside template + this.SnippetRoots.Add(Expr); + stringLit.AstNodeConfig = templateSettings; + + //define Expr as an expression non-terminal in your grammar + + */ + + public class StringTemplateNode : AstNode + { + #region embedded classes + + private enum SegmentType + { + Text, + Expression + } + + private class TemplateSegment + { + public SegmentType Type; + public string Text; + public AstNode ExpressionNode; + public int Position; //Position in raw text of the token for error reporting + + public TemplateSegment(string text, AstNode node, int position) + { + Type = node == null ? SegmentType.Text : SegmentType.Expression; + Text = text; + ExpressionNode = node; + Position = position; + } + } + + private class SegmentList : List + { } + + #endregion + + private string _template; + private string _tokenText; //used for locating error + private StringTemplateSettings _templateSettings; //copied from Terminal.AstNodeConfig + private SegmentList _segments = new SegmentList(); + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + _template = treeNode.Token.ValueString; + _tokenText = treeNode.Token.Text; + _templateSettings = treeNode.Term.AstConfig.Data as StringTemplateSettings; + ParseSegments(context); + AsString = "\"" + _template + "\" (templated string)"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var value = BuildString(thread); + thread.CurrentNode = Parent; //standard epilog + return value; + } + + private void ParseSegments(AstContext context) + { + var exprParser = new Parser(context.Language, _templateSettings.ExpressionRoot); + // As we go along the "value text" (that has all escapes done), we track the position in raw token text in the variable exprPosInTokenText. + // This position is position in original text in source code, including original escaping sequences and open/close quotes. + // It will be passed to segment constructor, and maybe used later to compute the exact position of runtime error when it occurs. + int currentPos = 0, exprPosInTokenText = 0; + while (true) + { + var startTagPos = _template.IndexOf(_templateSettings.StartTag, currentPos); + if (startTagPos < 0) startTagPos = _template.Length; + var text = _template.Substring(currentPos, startTagPos - currentPos); + if (!string.IsNullOrEmpty(text)) + _segments.Add(new TemplateSegment(text, null, 0)); //for text segments position is not used + if (startTagPos >= _template.Length) + break; //from while + //We have a real start tag, grab the expression + currentPos = startTagPos + _templateSettings.StartTag.Length; + var endTagPos = _template.IndexOf(_templateSettings.EndTag, currentPos); + if (endTagPos < 0) + { + //"No ending tag '{0}' found in embedded expression." + context.AddMessage(ErrorLevel.Error, Location, Resources.ErrNoEndTagInEmbExpr, _templateSettings.EndTag); + return; + } + var exprText = _template.Substring(currentPos, endTagPos - currentPos); + if (!string.IsNullOrEmpty(exprText)) + { + //parse the expression + //_expressionParser.context.Reset(); + + var exprTree = exprParser.Parse(exprText); + if (exprTree.HasErrors()) + { + //we use original search in token text instead of currentPos in template to avoid distortions caused by opening quote and escaped sequences + var baseLocation = Location + _tokenText.IndexOf(exprText); + CopyMessages(exprTree.ParserMessages, context.Messages, baseLocation, Resources.ErrInvalidEmbeddedPrefix); + return; + } + //add the expression segment + exprPosInTokenText = _tokenText.IndexOf(_templateSettings.StartTag, exprPosInTokenText) + _templateSettings.StartTag.Length; + var segmNode = exprTree.Root.AstNode as AstNode; + segmNode.Parent = this; //important to attach the segm node to current Module + _segments.Add(new TemplateSegment(null, segmNode, exprPosInTokenText)); + //advance position beyond the expression + exprPosInTokenText += exprText.Length + _templateSettings.EndTag.Length; + }//if + currentPos = endTagPos + _templateSettings.EndTag.Length; + }//while + } + + private void CopyMessages(LogMessageList fromList, LogMessageList toList, SourceLocation baseLocation, string messagePrefix) + { + foreach (var other in fromList) + toList.Add(new LogMessage(other.Level, baseLocation + other.Location, messagePrefix + other.Message, other.ParserState)); + }// + + private object BuildString(ScriptThread thread) + { + string[] values = new string[_segments.Count]; + for (int i = 0; i < _segments.Count; i++) + { + var segment = _segments[i]; + switch (segment.Type) + { + case SegmentType.Text: + values[i] = segment.Text; + break; + + case SegmentType.Expression: + values[i] = EvaluateExpression(thread, segment); + break; + }//else + }//for i + var result = string.Join(string.Empty, values); + return result; + }//method + + private string EvaluateExpression(ScriptThread thread, TemplateSegment segment) + { + try + { + var value = segment.ExpressionNode.Evaluate(thread); + return value == null ? string.Empty : value.ToString(); + } + catch + { + //We need to catch here and set current node; ExpressionNode may have reset it, and location would be wrong + //TODO: fix this - set error location to exact location inside string. + thread.CurrentNode = this; + throw; + } + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/SpecialNodes/EmptyStatementNode.cs b/src/Irony.Interpreter/Ast/SpecialNodes/EmptyStatementNode.cs new file mode 100644 index 0000000..c61fc70 --- /dev/null +++ b/src/Irony.Interpreter/Ast/SpecialNodes/EmptyStatementNode.cs @@ -0,0 +1,7 @@ +namespace Sanchime.Irony.Interpreter.Ast.SpecialNodes +{ + //A statement that does nothing, like "pass" command in Python. + public class EmptyStatementNode : AstNode + { + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/SpecialNodes/NotSupportedNode.cs b/src/Irony.Interpreter/Ast/SpecialNodes/NotSupportedNode.cs new file mode 100644 index 0000000..e9e4af4 --- /dev/null +++ b/src/Irony.Interpreter/Ast/SpecialNodes/NotSupportedNode.cs @@ -0,0 +1,25 @@ +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.SpecialNodes +{ + //A substitute node to use on constructs that are not yet supported by language implementation. + // The script would compile Ok but on attempt to evaluate the node would throw a runtime exception + public class NotSupportedNode : AstNode + { + private string Name; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + Name = treeNode.Term.ToString(); + AsString = Name + " (not supported)"; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + thread.ThrowScriptError(Resources.ErrConstructNotSupported, Name); + return null; //never happens + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/SpecialNodes/NullNode.cs b/src/Irony.Interpreter/Ast/SpecialNodes/NullNode.cs new file mode 100644 index 0000000..d2b9e24 --- /dev/null +++ b/src/Irony.Interpreter/Ast/SpecialNodes/NullNode.cs @@ -0,0 +1,19 @@ +namespace Sanchime.Irony.Interpreter.Ast.SpecialNodes +{ + //A stub to use when AST node was not created (type not specified on NonTerminal, or error on creation) + // The purpose of the stub is to throw a meaningful message when interpreter tries to evaluate null node. + public class NullNode : AstNode + { + public NullNode(BnfTerm term) + { + Term = term; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + thread.ThrowScriptError(Resources.ErrNullNodeEval, Term); + return null; //never happens + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Statements/AssignmentNode.cs b/src/Irony.Interpreter/Ast/Statements/AssignmentNode.cs new file mode 100644 index 0000000..323fc9f --- /dev/null +++ b/src/Irony.Interpreter/Ast/Statements/AssignmentNode.cs @@ -0,0 +1,114 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter.Ast.Statements +{ + public class AssignmentNode : AstNode + { + public AstNode Target; + public string AssignmentOp; + public bool IsAugmented; // true if it is augmented operation like "+=" + public ExpressionType BinaryExpressionType; + public AstNode Expression; + private OperatorImplementation _lastUsed; + private int _failureCount; + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + Target = AddChild(NodeUseType.ValueWrite, "To", nodes[0]); + //Get Op and baseOp if it is combined assignment + AssignmentOp = nodes[1].FindTokenAndGetText(); + if (string.IsNullOrEmpty(AssignmentOp)) + AssignmentOp = "="; + BinaryExpressionType = CustomExpressionTypes.NotAnExpression; + //There maybe an "=" sign in the middle, or not - if it is marked as punctuation; so we just take the last node in child list + Expression = AddChild(NodeUseType.ValueRead, "Expr", nodes[nodes.Count - 1]); + AsString = AssignmentOp + " (assignment)"; + // TODO: this is not always correct: in Pascal the assignment operator is :=. + IsAugmented = AssignmentOp.Length > 1; + if (IsAugmented) + { + var ictxt = context as InterpreterAstContext; + ExpressionType = ictxt.OperatorHandler.GetOperatorExpressionType(AssignmentOp); + BinaryExpressionType = ictxt.OperatorHandler.GetBinaryOperatorForAugmented(ExpressionType); + Target.UseType = NodeUseType.ValueReadWrite; + } + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + if (IsAugmented) + Evaluate = EvaluateAugmentedFast; + else + Evaluate = EvaluateSimple; //non-augmented + //call self-evaluate again, now to call real methods + var result = Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateSimple(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var value = Expression.Evaluate(thread); + Target.SetValue(thread, value); + thread.CurrentNode = Parent; //standard epilog + return value; + } + + private object EvaluateAugmentedFast(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var value = Target.Evaluate(thread); + var exprValue = Expression.Evaluate(thread); + object result = null; + if (_lastUsed != null) + { + try + { + result = _lastUsed.EvaluateBinary(value, exprValue); + } + catch + { + _failureCount++; + // if failed 3 times, change to method without direct try + if (_failureCount > 3) + Evaluate = EvaluateAugmented; + } //catch + }// if _lastUsed + if (result == null) + result = thread.Runtime.ExecuteBinaryOperator(BinaryExpressionType, value, exprValue, ref _lastUsed); + Target.SetValue(thread, result); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateAugmented(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + var value = Target.Evaluate(thread); + var exprValue = Expression.Evaluate(thread); + var result = thread.Runtime.ExecuteBinaryOperator(BinaryExpressionType, value, exprValue, ref _lastUsed); + Target.SetValue(thread, result); + thread.CurrentNode = Parent; //standard epilog + return result; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Ast/Statements/StatementListNode.cs b/src/Irony.Interpreter/Ast/Statements/StatementListNode.cs new file mode 100644 index 0000000..d630a34 --- /dev/null +++ b/src/Irony.Interpreter/Ast/Statements/StatementListNode.cs @@ -0,0 +1,101 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Interpreter.Ast.Statements +{ + public class StatementListNode : AstNode + { + private AstNode _singleChild; //stores a single child when child count == 1, for fast access + + public override void Init(AstContext context, ParseTreeNode treeNode) + { + base.Init(context, treeNode); + var nodes = treeNode.GetMappedChildNodes(); + foreach (var child in nodes) + { + //don't add if it is null; it can happen that "statement" is a comment line and statement's node is null. + // So to make life easier for language creator, we just skip if it is null + if (child.AstNode != null) + AddChild(string.Empty, child); + } + AsString = "Statement List"; + if (ChildNodes.Count == 0) + { + AsString += " (Empty)"; + } + else + ChildNodes[ChildNodes.Count - 1].Flags |= AstNodeFlags.IsTail; + } + + protected override object DoEvaluate(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + lock (LockObject) + { + switch (ChildNodes.Count) + { + case 0: + Evaluate = EvaluateEmpty; + break; + + case 1: + _singleChild = ChildNodes[0]; + Evaluate = EvaluateOne; + break; + + default: + Evaluate = EvaluateMultiple; + break; + }//switch + }//lock + var result = Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateEmpty(ScriptThread thread) + { + return null; + } + + private object EvaluateOne(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + object result = _singleChild.Evaluate(thread); + thread.CurrentNode = Parent; //standard epilog + return result; + } + + private object EvaluateMultiple(ScriptThread thread) + { + thread.CurrentNode = this; //standard prolog + object result = null; + for (int i = 0; i < ChildNodes.Count; i++) + { + result = ChildNodes[i].Evaluate(thread); + } + thread.CurrentNode = Parent; //standard epilog + return result; //return result of last statement + } + + public override void SetIsTail() + { + base.SetIsTail(); + if (ChildNodes.Count > 0) + ChildNodes[ChildNodes.Count - 1].SetIsTail(); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/Binding.cs b/src/Irony.Interpreter/Bindings/Binding.cs new file mode 100644 index 0000000..cefd9cd --- /dev/null +++ b/src/Irony.Interpreter/Bindings/Binding.cs @@ -0,0 +1,61 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Bindings +{ + // Binding is a link between a variable in the script (for ex, IdentifierNode) and a value storage - + // a slot in local or module-level Scope. Binding to internal variables is supported by SlotBinding class. + // Alternatively a symbol can be bound to external CLR entity in imported namespace - class, function, property, etc. + // Binding is produced by Runtime.Bind method and allows read/write operations through GetValueRef and SetValueRef methods. + public class Binding + { + public readonly BindingTargetInfo TargetInfo; + public EvaluateMethod GetValueRef; // ref to Getter method implementation + public ValueSetterMethod SetValueRef; // ref to Setter method implementation + public bool IsConstant { get; protected set; } + + public Binding(BindingTargetInfo targetInfo) + { + TargetInfo = targetInfo; + } + + public Binding(string symbol, BindingTargetType targetType) + { + TargetInfo = new BindingTargetInfo(symbol, targetType); + } + + public override string ToString() + { + return "{Binding to + " + TargetInfo.ToString() + "}"; + } + }//class + + //Binding to a "fixed", constant value + public class ConstantBinding : Binding + { + public object Target; + + public ConstantBinding(object target, BindingTargetInfo targetInfo) : base(targetInfo) + { + Target = target; + GetValueRef = GetValue; + IsConstant = true; + } + + public object GetValue(ScriptThread thread) + { + return Target; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/BindingRequest.cs b/src/Irony.Interpreter/Bindings/BindingRequest.cs new file mode 100644 index 0000000..20b2bff --- /dev/null +++ b/src/Irony.Interpreter/Bindings/BindingRequest.cs @@ -0,0 +1,52 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Bindings +{ + [Flags] + public enum BindingRequestFlags + { + Read = 0x01, + Write = 0x02, + Invoke = 0x04, + ExistingOrNew = 0x10, + NewOnly = 0x20, // for new variable, for ex, in JavaScript "var x..." - introduces x as new variable + } + + //Binding request is a container for information about requested binding. Binding request goes from an Ast node to language runtime. + // For example, identifier node would request a binding for an identifier. + public class BindingRequest + { + public ScriptThread Thread; + public AstNode FromNode; + public ModuleInfo FromModule; + public BindingRequestFlags Flags; + public string Symbol; + public ScopeInfo FromScopeInfo; + public bool IgnoreCase; + + public BindingRequest(ScriptThread thread, AstNode fromNode, string symbol, BindingRequestFlags flags) + { + Thread = thread; + FromNode = fromNode; + FromModule = thread.App.DataMap.GetModule(fromNode.ModuleNode); + Symbol = symbol; + Flags = flags; + FromScopeInfo = thread.CurrentScope.Info; + IgnoreCase = !thread.Runtime.Language.Grammar.CaseSensitive; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/BindingTargetInfo.cs b/src/Irony.Interpreter/Bindings/BindingTargetInfo.cs new file mode 100644 index 0000000..d8d03e5 --- /dev/null +++ b/src/Irony.Interpreter/Bindings/BindingTargetInfo.cs @@ -0,0 +1,42 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Bindings +{ + public enum BindingTargetType + { + Slot, + BuiltInObject, + SpecialForm, + ClrInterop, + Custom, // any special non-standard type for specific language + } + + public class BindingTargetInfo + { + public readonly string Symbol; + public readonly BindingTargetType Type; + + public BindingTargetInfo(string symbol, BindingTargetType type) + { + Symbol = symbol; + Type = type; + } + + public override string ToString() + { + return Symbol + "/" + Type.ToString(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/BuiltInObjectBinding.cs b/src/Irony.Interpreter/Bindings/BuiltInObjectBinding.cs new file mode 100644 index 0000000..f2fb3bf --- /dev/null +++ b/src/Irony.Interpreter/Bindings/BuiltInObjectBinding.cs @@ -0,0 +1,87 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter +{ + // A general delegate representing a built-in method implementation. + public delegate object BuiltInMethod(ScriptThread thread, object[] args); + + //A wrapper to convert BuiltInMethod delegate (referencing some custom method in LanguageRuntime) into an ICallTarget instance (expected by FunctionCallNode) + public class BuiltInCallTarget : ICallTarget + { + public string Name; + public readonly BuiltInMethod Method; + public readonly int MinParamCount, MaxParamCount; + public string[] ParameterNames; //Just for information purpose + + public BuiltInCallTarget(BuiltInMethod method, string name, int minParamCount = 0, int maxParamCount = 0, string parameterNames = null) + { + Method = method; + Name = name; + MinParamCount = minParamCount; + MaxParamCount = Math.Max(MinParamCount, maxParamCount); + if (!string.IsNullOrEmpty(parameterNames)) + ParameterNames = parameterNames.Split(','); + } + + #region ICallTarget Members + + public object Call(ScriptThread thread, object[] parameters) + { + return Method(thread, parameters); + } + + #endregion + } + + // The class contains information about built-in function. It has double purpose. + // First, it is used as a BindingTargetInfo instance (meta-data) for a binding to a built-in function. + // Second, we use it as a reference to a custom built-in method that we store in LanguageRuntime.BuiltIns table. + // For this, we make it implement IBindingSource - we can add it to BuiltIns table of LanguageRuntime, which is a table of IBindingSource instances. + // Being IBindingSource, it can produce a binding object to the target method - singleton in fact; + // the same binding object is used for all calls to the method from all function-call AST nodes. + public class BuiltInCallableTargetInfo : BindingTargetInfo, IBindingSource + { + public Binding BindingInstance; //A singleton binding instance; we share it for all AST nodes (function call nodes) that call the method. + + public BuiltInCallableTargetInfo(BuiltInMethod method, string methodName, int minParamCount = 0, int maxParamCount = 0, string parameterNames = null) : + this(new BuiltInCallTarget(method, methodName, minParamCount, maxParamCount, parameterNames)) + { + } + + public BuiltInCallableTargetInfo(BuiltInCallTarget target) : base(target.Name, BindingTargetType.BuiltInObject) + { + BindingInstance = new ConstantBinding(target, this); + } + + //Implement IBindingSource.Bind + public Binding Bind(BindingRequest request) + { + return BindingInstance; + } + }//class + + // Method for adding methods to BuiltIns table in Runtime + public static partial class BindingSourceTableExtensions + { + public static BindingTargetInfo AddMethod(this BindingSourceTable targets, BuiltInMethod method, string methodName, + int minParamCount = 0, int maxParamCount = 0, string parameterNames = null) + { + var callTarget = new BuiltInCallTarget(method, methodName, minParamCount, maxParamCount, parameterNames); + var targetInfo = new BuiltInCallableTargetInfo(callTarget); + targets.Add(methodName, targetInfo); + return targetInfo; + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/ClrInteropBindings.cs b/src/Irony.Interpreter/Bindings/ClrInteropBindings.cs new file mode 100644 index 0000000..a9da58c --- /dev/null +++ b/src/Irony.Interpreter/Bindings/ClrInteropBindings.cs @@ -0,0 +1,210 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Reflection; + +namespace Sanchime.Irony.Interpreter.Bindings +{ + //Unfinished, work in progress, file disabled for now + + public enum ClrTargetType + { + Namespace, + Type, + Method, + Property, + Field, + } + + public class ClrInteropBindingTargetInfo : BindingTargetInfo, IBindingSource + { + public ClrTargetType TargetSubType; + + public ClrInteropBindingTargetInfo(string symbol, ClrTargetType targetSubType) : base(symbol, BindingTargetType.ClrInterop) + { + TargetSubType = targetSubType; + } + + public virtual Binding Bind(BindingRequest request) + { + throw new NotImplementedException(); + } + }//class + + public class ClrNamespaceBindingTargetInfo : ClrInteropBindingTargetInfo + { + private ConstantBinding _binding; + + public ClrNamespaceBindingTargetInfo(string ns) : base(ns, ClrTargetType.Namespace) + { + _binding = new ConstantBinding(ns, this); + } + + public override Binding Bind(BindingRequest request) + { + return _binding; + } + } + + public class ClrTypeBindingTargetInfo : ClrInteropBindingTargetInfo + { + private ConstantBinding _binding; + + public ClrTypeBindingTargetInfo(Type type) : base(type.Name, ClrTargetType.Type) + { + _binding = new ConstantBinding(type, this); + } + + public override Binding Bind(BindingRequest request) + { + return _binding; + } + } + + public class ClrMethodBindingTargetInfo : ClrInteropBindingTargetInfo, ICallTarget + { //The object works as ICallTarget itself + public object Instance; + public Type DeclaringType; + private BindingFlags _invokeFlags; + private Binding _binding; + + public ClrMethodBindingTargetInfo(Type declaringType, string methodName, object instance = null) : base(methodName, ClrTargetType.Method) + { + DeclaringType = declaringType; + Instance = instance; + _invokeFlags = BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.NonPublic; + if (Instance == null) + _invokeFlags |= BindingFlags.Static; + else + _invokeFlags |= BindingFlags.Instance; + _binding = new ConstantBinding(target: this as ICallTarget, targetInfo: this); + //The object works as CallTarget itself; the "as" conversion is not needed in fact, we do it just to underline the role + } + + public override Binding Bind(BindingRequest request) + { + return _binding; + } + + #region ICalllable.Call implementation + + public object Call(ScriptThread thread, object[] args) + { + // TODO: fix this. Currently doing it slow but easy way, through reflection + if (args != null && args.Length == 0) + args = null; + var memberInfo = DeclaringType.GetTypeInfo().GetMethod(base.Symbol, _invokeFlags); + object result = null; + if (memberInfo != null) + { + result = memberInfo.Invoke(Instance, args); + } + //var result = DeclaringType.InvokeMember(base.Symbol, _invokeFlags, null, Instance, args); + return result; + } + + #endregion + } + + public class ClrPropertyBindingTargetInfo : ClrInteropBindingTargetInfo + { + public object Instance; + public PropertyInfo Property; + private Binding _binding; + + public ClrPropertyBindingTargetInfo(PropertyInfo property, object instance) : base(property.Name, ClrTargetType.Property) + { + Property = property; + Instance = instance; + _binding = new Binding(this); + _binding.GetValueRef = GetPropertyValue; + _binding.SetValueRef = SetPropertyValue; + } + + public override Binding Bind(BindingRequest request) + { + return _binding; + } + + private object GetPropertyValue(ScriptThread thread) + { + var result = Property.GetValue(Instance, null); + return result; + } + + private void SetPropertyValue(ScriptThread thread, object value) + { + Property.SetValue(Instance, value, null); + } + } + + public class ClrFieldBindingTargetInfo : ClrInteropBindingTargetInfo + { + public object Instance; + public FieldInfo Field; + private Binding _binding; + + public ClrFieldBindingTargetInfo(FieldInfo field, object instance) : base(field.Name, ClrTargetType.Field) + { + Field = field; + Instance = instance; + _binding = new Binding(this); + _binding.GetValueRef = GetPropertyValue; + _binding.SetValueRef = SetPropertyValue; + } + + public override Binding Bind(BindingRequest request) + { + return _binding; + } + + private object GetPropertyValue(ScriptThread thread) + { + var result = Field.GetValue(Instance); + return result; + } + + private void SetPropertyValue(ScriptThread thread, object value) + { + Field.SetValue(Instance, value); + } + } + + // Method for adding methods to BuiltIns table in Runtime + public static partial class BindingSourceTableExtensions + { + public static void ImportStaticMembers(this BindingSourceTable targets, Type fromType) + { + var members = fromType.GetMembers(BindingFlags.Public | BindingFlags.Static); + foreach (var member in members) + { + if (targets.ContainsKey(member.Name)) continue; //do not import overloaded methods several times + switch (member.MemberType) + { + case MemberTypes.Method: + targets.Add(member.Name, new ClrMethodBindingTargetInfo(fromType, member.Name)); + break; + + case MemberTypes.Property: + targets.Add(member.Name, new ClrPropertyBindingTargetInfo(member as PropertyInfo, null)); + break; + + case MemberTypes.Field: + targets.Add(member.Name, new ClrFieldBindingTargetInfo(member as FieldInfo, null)); + break; + }//switch + }//foreach + }//method + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/IBindingSource.cs b/src/Irony.Interpreter/Bindings/IBindingSource.cs new file mode 100644 index 0000000..7d066f7 --- /dev/null +++ b/src/Irony.Interpreter/Bindings/IBindingSource.cs @@ -0,0 +1,47 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Bindings +{ + public interface IBindingSource + { + Binding Bind(BindingRequest request); + } + + public class BindingSourceList : List + { + } + + public class BindingSourceTable : Dictionary, IBindingSource + { + public BindingSourceTable(bool caseSensitive) + : base(caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase) + { + } + + //IBindingSource Members + public Binding Bind(BindingRequest request) + { + IBindingSource target; + if (TryGetValue(request.Symbol, out target)) + return target.Bind(request); + return null; + } + }//class + + // This class will be used to define extensions for BindingSourceTable + public static partial class BindingSourceTableExtensions + { + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/ModuleExport.cs b/src/Irony.Interpreter/Bindings/ModuleExport.cs new file mode 100644 index 0000000..8f7127c --- /dev/null +++ b/src/Irony.Interpreter/Bindings/ModuleExport.cs @@ -0,0 +1,35 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Bindings +{ + // Module export, container for public, exported symbols from module + // Just a skeleton, to be completed + public class ModuleExport : IBindingSource + { + public ModuleInfo Module; + + public ModuleExport(ModuleInfo module) + { + Module = module; + } + + public Binding Bind(BindingRequest request) + { + return null; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/SlotBinding.cs b/src/Irony.Interpreter/Bindings/SlotBinding.cs new file mode 100644 index 0000000..5624cb3 --- /dev/null +++ b/src/Irony.Interpreter/Bindings/SlotBinding.cs @@ -0,0 +1,257 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.Bindings +{ + // Implements fast access to a variable (local/global var or parameter) in local scope or in any enclosing scope + // Important: the following code is very sensitive to even tiny changes - do not know exactly particular reasons. + public sealed class SlotBinding : Binding + { + public SlotInfo Slot; + public ScopeInfo FromScope; + public int SlotIndex; + public int StaticScopeIndex; + public AstNode FromNode; + + public SlotBinding(SlotInfo slot, AstNode fromNode, ScopeInfo fromScope) : base(slot.Name, BindingTargetType.Slot) + { + Slot = slot; + FromNode = fromNode; + FromScope = fromScope; + SlotIndex = slot.Index; + StaticScopeIndex = Slot.ScopeInfo.StaticIndex; + SetupAccessorMethods(); + } + + private void SetupAccessorMethods() + { + // Check module scope + if (Slot.ScopeInfo.StaticIndex >= 0) + { + GetValueRef = FastGetStaticValue; + SetValueRef = SetStatic; + return; + } + var levelDiff = Slot.ScopeInfo.Level - FromScope.Level; + switch (levelDiff) + { + case 0: // local scope + if (Slot.Type == SlotType.Value) + { + GetValueRef = FastGetCurrentScopeValue; + SetValueRef = SetCurrentScopeValue; + } + else + { + GetValueRef = FastGetCurrentScopeParameter; + SetValueRef = SetCurrentScopeParameter; + } + return; + + case 1: //direct parent + if (Slot.Type == SlotType.Value) + { + GetValueRef = GetImmediateParentScopeValue; + SetValueRef = SetImmediateParentScopeValue; + } + else + { + GetValueRef = GetImmediateParentScopeParameter; + SetValueRef = SetImmediateParentScopeParameter; + } + return; + + default: // some enclosing scope + if (Slot.Type == SlotType.Value) + { + GetValueRef = GetParentScopeValue; + SetValueRef = SetParentScopeValue; + } + else + { + GetValueRef = GetParentScopeParameter; + SetValueRef = SetParentScopeParameter; + } + return; + } + } + + // Specific method implementations ======================================================================================================= + // Optimization: in most cases we go directly for Values array; if we fail, then we fallback to full method + // with proper exception handling. This fallback is expected to be extremely rare, so overall we have considerable perf gain + // Note that in we expect the methods to be used directly by identifier node (like: IdentifierNode.EvaluateRef = Binding.GetValueRef; } - + // to save a few processor cycles. Therefore, we need to provide a proper context (thread.CurrentNode) in case of exception. + // In all "full-method" implementations we set current node to FromNode, so exception correctly points + // to the owner Identifier node as a location of error. + + // Current scope + private object FastGetCurrentScopeValue(ScriptThread thread) + { + try + { + //optimization: we go directly for values array; if we fail, then we fallback to regular "proper" method. + return thread.CurrentScope.Values[SlotIndex]; + } + catch + { + return GetCurrentScopeValue(thread); + } + } + + private object GetCurrentScopeValue(ScriptThread thread) + { + try + { + return thread.CurrentScope.GetValue(SlotIndex); + } + catch { thread.CurrentNode = FromNode; throw; } + } + + private object FastGetCurrentScopeParameter(ScriptThread thread) + { + //optimization: we go directly for parameters array; if we fail, then we fallback to regular "proper" method. + try + { + return thread.CurrentScope.Parameters[SlotIndex]; + } + catch + { + return GetCurrentScopeParameter(thread); + } + } + + private object GetCurrentScopeParameter(ScriptThread thread) + { + try + { + return thread.CurrentScope.GetParameter(SlotIndex); + } + catch { thread.CurrentNode = FromNode; throw; } + } + + private void SetCurrentScopeValue(ScriptThread thread, object value) + { + thread.CurrentScope.SetValue(SlotIndex, value); + } + + private void SetCurrentScopeParameter(ScriptThread thread, object value) + { + thread.CurrentScope.SetParameter(SlotIndex, value); + } + + // Static scope (module-level variables) + private object FastGetStaticValue(ScriptThread thread) + { + try + { + return thread.App.StaticScopes[StaticScopeIndex].Values[SlotIndex]; + } + catch + { + return GetStaticValue(thread); + } + } + + private object GetStaticValue(ScriptThread thread) + { + try + { + return thread.App.StaticScopes[StaticScopeIndex].GetValue(SlotIndex); + } + catch { thread.CurrentNode = FromNode; throw; } + } + + private void SetStatic(ScriptThread thread, object value) + { + thread.App.StaticScopes[StaticScopeIndex].SetValue(SlotIndex, value); + } + + // Direct parent + private object GetImmediateParentScopeValue(ScriptThread thread) + { + try + { + return thread.CurrentScope.Parent.Values[SlotIndex]; + } + catch { } + //full method + try + { + return thread.CurrentScope.Parent.GetValue(SlotIndex); + } + catch { thread.CurrentNode = FromNode; throw; } + } + + private object GetImmediateParentScopeParameter(ScriptThread thread) + { + try + { + return thread.CurrentScope.Parent.Parameters[SlotIndex]; + } + catch { } + //full method + try + { + return thread.CurrentScope.Parent.GetParameter(SlotIndex); + } + catch { thread.CurrentNode = FromNode; throw; } + } + + private void SetImmediateParentScopeValue(ScriptThread thread, object value) + { + thread.CurrentScope.Parent.SetValue(SlotIndex, value); + } + + private void SetImmediateParentScopeParameter(ScriptThread thread, object value) + { + thread.CurrentScope.Parent.SetParameter(SlotIndex, value); + } + + // Generic case + private object GetParentScopeValue(ScriptThread thread) + { + var targetScope = GetTargetScope(thread); + return targetScope.GetValue(SlotIndex); + } + + private object GetParentScopeParameter(ScriptThread thread) + { + var targetScope = GetTargetScope(thread); + return targetScope.GetParameter(SlotIndex); + } + + private void SetParentScopeValue(ScriptThread thread, object value) + { + var targetScope = GetTargetScope(thread); + targetScope.SetValue(SlotIndex, value); + } + + private void SetParentScopeParameter(ScriptThread thread, object value) + { + var targetScope = GetTargetScope(thread); + targetScope.SetParameter(SlotIndex, value); + } + + private Scope GetTargetScope(ScriptThread thread) + { + var targetLevel = Slot.ScopeInfo.Level; + var scope = thread.CurrentScope.Parent; + while (scope.Info.Level > targetLevel) + scope = scope.Parent; + return scope; + } + }//class SlotReader +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/SpecialFormBinding.cs b/src/Irony.Interpreter/Bindings/SpecialFormBinding.cs new file mode 100644 index 0000000..621bc33 --- /dev/null +++ b/src/Irony.Interpreter/Bindings/SpecialFormBinding.cs @@ -0,0 +1,57 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Bindings +{ + public class SpecialFormBindingInfo : BindingTargetInfo, IBindingSource + { + public readonly ConstantBinding Binding; + public readonly int MinChildCount, MaxChildCount; + public string[] ChildRoles; + + public SpecialFormBindingInfo(string symbol, SpecialForm form, int minChildCount = 0, int maxChildCount = 0, string childRoles = null) + : base(symbol, BindingTargetType.SpecialForm) + { + Binding = new ConstantBinding(form, this); + MinChildCount = minChildCount; + MaxChildCount = Math.Max(minChildCount, maxChildCount); //if maxParamCount=0 then set it equal to minParamCount + if (!string.IsNullOrEmpty(childRoles)) + { + ChildRoles = childRoles.Split(','); + //TODO: add check that paramNames array is in accord with min/max param counts + } + } + + #region IBindingSource Members + + public Binding Bind(BindingRequest request) + { + return Binding; + } + + #endregion + }//class + + public static partial class BindingSourceTableExtensions + { + //Method for adding methods to BuiltIns table in Runtime + public static BindingTargetInfo AddSpecialForm(this BindingSourceTable targets, SpecialForm form, string formName, + int minChildCount = 0, int maxChildCount = 0, string parameterNames = null) + { + var formInfo = new SpecialFormBindingInfo(formName, form, minChildCount, maxChildCount, parameterNames); + targets.Add(formName, formInfo); + return formInfo; + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Bindings/_about_bindings.txt b/src/Irony.Interpreter/Bindings/_about_bindings.txt new file mode 100644 index 0000000..efdf676 --- /dev/null +++ b/src/Irony.Interpreter/Bindings/_about_bindings.txt @@ -0,0 +1,19 @@ +Some vocabulary, to clarify the terms and class names: + +Binding is an object that serves as a link between a symbol and its value. Binding has methods GetValue and SetValue, for setting/getting values into the current context of the app. + For example, symbol X in a script has a corresponding IdentifierNode (AST node). The code in this node, before it can read the value, must get a binding for a symbol "X". On the first execution the node calls a Bind method of the current thread, passing it a BindingRequest object (see below) and expecting back a Binding object that can be used to access the value. + Having a binding object , it can read the value: + var value = binding.GetValue(thread); + +Binding Target Types - classification of bindings by a type of the target. One binding target is a Slot - a local or global variable. Other examples: built-in method; CLR method or object imported through interop. + + +BindingTargetInfo is a metadata for a binding; contains Symbol and BindingType. Each binding has TargetInfo property that describes it. + +BindingRequest is a container for information about a desired binding when the code in AST node tries to get the binding from the executing script environment. It contains Symbol (name of the variable or function), and some other flags. + +IBindingSource is an abstraction of a binding source - something that can produce a binding for a symbol. Simply speaking, binding source is asked "Do you have something named 'foo'?" it answers 'yes, here is a binding to foo thing'. Examples of binding sources: a table of built-in methods; a local frame with a set of variables; import specification in a module pointing to external module. + +BindingSourceTable is a table of binding sources indexed by name. LanguageRuntime.BuiltIns field is such a table - it contains built-in methods and objects, stored as binding sources. + +Scope - a set of variables in a programming scope. For ex: local scope, module scope, object scope. Scope consists of slots - locations where values are stored. Each slot has a SlotInfo meta data object. Each Scope has a ScopeInfo object (metadata) that contains a list of SlotInfo objects desribing slots in the scope. diff --git a/src/Irony.Interpreter/Diagnostics/ScriptException.cs b/src/Irony.Interpreter/Diagnostics/ScriptException.cs new file mode 100644 index 0000000..2fdd772 --- /dev/null +++ b/src/Irony.Interpreter/Diagnostics/ScriptException.cs @@ -0,0 +1,42 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Diagnostics +{ + public class ScriptException : Exception + { + public SourceLocation Location; + public ScriptStackTrace ScriptStackTrace; + + public ScriptException(string message) : base(message) + { + } + + public ScriptException(string message, Exception inner) : base(message, inner) + { + } + + public ScriptException(string message, Exception inner, SourceLocation location, ScriptStackTrace stack) + : base(message, inner) + { + Location = location; + ScriptStackTrace = stack; + } + + public override string ToString() + { + return Message + Environment.NewLine + ScriptStackTrace.ToString(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Diagnostics/ScriptStackTrace.cs b/src/Irony.Interpreter/Diagnostics/ScriptStackTrace.cs new file mode 100644 index 0000000..d5933da --- /dev/null +++ b/src/Irony.Interpreter/Diagnostics/ScriptStackTrace.cs @@ -0,0 +1,20 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Diagnostics +{ + public class ScriptStackTrace + { + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/GlobalUsings.cs b/src/Irony.Interpreter/GlobalUsings.cs new file mode 100644 index 0000000..e34ef7e --- /dev/null +++ b/src/Irony.Interpreter/GlobalUsings.cs @@ -0,0 +1,11 @@ +global using Sanchime.Irony.Interpreter.Ast; +global using Sanchime.Irony.Interpreter.Bindings; +global using Sanchime.Irony.Interpreter.Diagnostics; +global using Sanchime.Irony.Interpreter.SriptApplication; +global using Sanchime.Irony.Parsing.Data; +global using Sanchime.Irony.Parsing.Grammars; +global using Sanchime.Irony.Parsing.Parsers; +global using Sanchime.Irony.Parsing.Scanners; +global using Sanchime.Irony.Parsing.Terminals; +global using Sanchime.Irony.Utilities; +global using System.Text; \ No newline at end of file diff --git a/src/Irony.Interpreter/InterpretedLanguageGrammar.cs b/src/Irony.Interpreter/InterpretedLanguageGrammar.cs new file mode 100644 index 0000000..75906da --- /dev/null +++ b/src/Irony.Interpreter/InterpretedLanguageGrammar.cs @@ -0,0 +1,66 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Interpreter.Utilities; + +namespace Sanchime.Irony.Interpreter +{ + /// Base class for languages that use Irony Interpreter to execute scripts. + public abstract class InterpretedLanguageGrammar : Grammar, ICanRunSample + { + // making the class abstract so it won't load into Grammar Explorer + public InterpretedLanguageGrammar(bool caseSensitive) + : base(caseSensitive) + { + LanguageFlags = LanguageFlags.CreateAst; + } + + // This method allows custom implementation of running a sample in Grammar Explorer + // By default it evaluates a parse tree using default interpreter. + // Irony's interpeter has one restriction: once a script (represented by AST node) is evaluated in ScriptApp, + // its internal fields in AST nodes become tied to this particular instance of ScriptApp (more precisely DataMap). + // If you want to evaluate the AST tree again, you have to do it in the context of the same DataMap. + // Grammar Explorer may call RunSample method repeatedly for evaluation of the same parsed script. So we keep ScriptApp instance in + // the field, and if we get the same script node, then we reuse the ScriptApp thus satisfying the requirement. + private ScriptApp _app; + + private ParseTree _prevSample; + + public virtual string RunSample(RunSampleArgs args) + { + if (_app == null || args.ParsedSample != _prevSample) + _app = new ScriptApp(args.Language); + _prevSample = args.ParsedSample; + + //for (int i = 0; i < 1000; i++) //for perf measurements, to execute 1000 times + _app.Evaluate(args.ParsedSample); + return _app.OutputBuffer.ToString(); + } + + public virtual LanguageRuntime CreateRuntime(LanguageData language) + { + return new LanguageRuntime(language); + } + + public override void BuildAst(LanguageData language, ParseTree parseTree) + { + var opHandler = new OperatorHandler(language.Grammar.CaseSensitive); + Util.Check(!parseTree.HasErrors(), "ParseTree has errors, cannot build AST."); + var astContext = new InterpreterAstContext(language, opHandler); + var astBuilder = new AstBuilder(astContext); + astBuilder.BuildAst(parseTree); + } + } //grammar class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime.cs b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime.cs new file mode 100644 index 0000000..27a9f44 --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime.cs @@ -0,0 +1,82 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter +{ + public class ConsoleWriteEventArgs : EventArgs + { + public string Text; + + public ConsoleWriteEventArgs(string text) + { + Text = text; + } + } + + //Note: mark the derived language-specific class as sealed - important for JIT optimizations + // details here: http://www.codeproject.com/KB/dotnet/JITOptimizations.aspx + public partial class LanguageRuntime + { + public readonly LanguageData Language; + public OperatorHandler OperatorHandler; + + //Converter of the result for comparison operation; converts bool value to values + // specific for the language + public UnaryOperatorMethod BoolResultConverter = null; + + //An unassigned reserved object for a language implementation + public NoneClass NoneValue { get; protected set; } + + //Built-in binding sources + public BindingSourceTable BuiltIns; + + public LanguageRuntime(LanguageData language) + { + Language = language; + NoneValue = NoneClass.Value; + BuiltIns = new BindingSourceTable(Language.Grammar.CaseSensitive); + Init(); + } + + public virtual void Init() + { + InitOperatorImplementations(); + } + + public virtual bool IsTrue(object value) + { + if (value is bool) + return (bool)value; + if (value is int) + return ((int)value != 0); + if (value == NoneValue) + return false; + return value != null; + } + + protected internal void ThrowError(string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + throw new Exception(message); + } + + protected internal void ThrowScriptError(string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + throw new ScriptException(message); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_Binding.cs b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_Binding.cs new file mode 100644 index 0000000..6cbf289 --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_Binding.cs @@ -0,0 +1,116 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; +using Sanchime.Irony.Interpreter.Utilities; + +namespace Sanchime.Irony.Interpreter +{ + public partial class LanguageRuntime : IBindingSource + { + //Binds to local variables, enclosing scopes, module scopes/globals and built-ins + public virtual Binding Bind(BindingRequest request) + { + var symbol = request.Symbol; + var mode = request.Flags; + if (mode.IsSet(BindingRequestFlags.Write)) + return BindSymbolForWrite(request); + else if (mode.IsSet(BindingRequestFlags.Read)) + return BindSymbolForRead(request); + else + { + //TODO: need to throw fatal error here + request.Thread.ThrowScriptError("Invalid binding request, access type (Read or Write) is not set in request Options."); + return null; // never happens + } + }//method + + public virtual Binding BindSymbolForWrite(BindingRequest request) + { + var scope = request.Thread.CurrentScope; + var existingSlot = scope.Info.GetSlot(request.Symbol); + //1. If new only, check it does not exist yet, create and return it + if (request.Flags.IsSet(BindingRequestFlags.NewOnly)) + { + if (existingSlot != null) + request.Thread.ThrowScriptError("Variable {0} already exists.", request.Symbol); + var newSlot = scope.AddSlot(request.Symbol); + return new SlotBinding(newSlot, request.FromNode, request.FromScopeInfo); + } + //2. If exists, then return it + if (existingSlot != null && request.Flags.IsSet(BindingRequestFlags.ExistingOrNew)) + { + //TODO: For external client, check that slot is actually public or exported + return new SlotBinding(existingSlot, request.FromNode, request.FromScopeInfo); + } + + //3. Check external module imports + foreach (var imp in request.FromModule.Imports) + { + var result = imp.Bind(request); + if (result != null) + return result; + } + + //4. If nothing found, create new slot in current scope + if (request.Flags.IsSet(BindingRequestFlags.ExistingOrNew)) + { + var newSlot = scope.AddSlot(request.Symbol); + return new SlotBinding(newSlot, request.FromNode, request.FromScopeInfo); + } + + //5. Check built-in methods + var builtIn = BuiltIns.Bind(request); + if (builtIn != null) return builtIn; + + //6. If still not found, return null. + return null; + }//method + + public virtual Binding BindSymbolForRead(BindingRequest request) + { + var symbol = request.Symbol; + // First check current and enclosing scopes + var currScope = request.Thread.CurrentScope; + do + { + var existingSlot = currScope.Info.GetSlot(symbol); + if (existingSlot != null) + return new SlotBinding(existingSlot, request.FromNode, request.FromScopeInfo); + currScope = currScope.Parent; + } while (currScope != null); + + // If not found, check imports + foreach (var imp in request.FromModule.Imports) + { + var result = imp.Bind(request); + if (result != null) + return result; + } + + // Check built-in modules + var builtIn = BuiltIns.Bind(request); + if (builtIn != null) return builtIn; + + // if not found, return null + return null; + } + + //Binds symbol to a public member exported by a module. + public virtual Binding BindSymbol(BindingRequest request, ModuleInfo module) + { + return module.BindToExport(request); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch.cs b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch.cs new file mode 100644 index 0000000..4abe59e --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch.cs @@ -0,0 +1,132 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter +{ + public partial class LanguageRuntime + { + public readonly OperatorImplementationTable OperatorImplementations = new OperatorImplementationTable(2000); + + public object ExecuteBinaryOperator(ExpressionType op, object arg1, object arg2, ref OperatorImplementation previousUsed) + { + // 1. Get arg types + Type arg1Type, arg2Type; + try + { + arg1Type = arg1.GetType(); + arg2Type = arg2.GetType(); + } + catch (NullReferenceException) + { + // arg1 or arg2 is null - which means never assigned. + CheckUnassigned(arg1); + CheckUnassigned(arg2); + throw; + } + + // 2. If we had prev impl, check if current args types match it; first copy it into local variable + // Note: BinaryExpression node might already have tried it directly, without any checks, and + // apparently failed. At some point this attempt in BinaryExpressionNode can become disabled. + // But we might still try it here, with proper checks + var currentImpl = previousUsed; + if (currentImpl != null && (arg1Type != currentImpl.Key.Arg1Type || arg2Type != currentImpl.Key.Arg2Type)) + currentImpl = null; + + // 3. Find implementation for arg types + OperatorDispatchKey key; + if (currentImpl == null) + { + key = new OperatorDispatchKey(op, arg1Type, arg2Type); + if (!OperatorImplementations.TryGetValue(key, out currentImpl)) + ThrowScriptError(Resources.ErrOpNotDefinedForTypes, op, arg1Type, arg2Type); + } + + // 4. Actually call + try + { + previousUsed = currentImpl; + return currentImpl.EvaluateBinary(arg1, arg2); + } + catch (OverflowException) + { + if (currentImpl.OverflowHandler == null) throw; + previousUsed = currentImpl.OverflowHandler; //set previousUsed to overflowHandler, so it will be used next time + return ExecuteBinaryOperator(op, arg1, arg2, ref previousUsed); //call self recursively + } + catch (IndexOutOfRangeException) + { + //We can get here only if we use SmartBoxing - the result is out of range of pre-allocated boxes, + // so attempt to lookup a boxed value in _boxes dictionary fails with outOfRange exc + if (currentImpl.NoBoxImplementation == null) throw; + // If NoBoxImpl is not null, then it is implementation with auto-boxing. + // Auto-boxing failed - the result is outside the range of our boxes array. Let's call no-box version. + // we also set previousUsed to no-box implementation, so we use it in the future calls + previousUsed = currentImpl.NoBoxImplementation; + return ExecuteBinaryOperator(op, arg1, arg2, ref previousUsed); //call self recursively + } + }//method + + public object ExecuteUnaryOperator(ExpressionType op, object arg1, ref OperatorImplementation previousUsed) + { + // 1. Get arg type + Type arg1Type; + try + { + arg1Type = arg1.GetType(); + } + catch (NullReferenceException) + { + CheckUnassigned(arg1); + throw; + } + + // 2. If we had prev impl, check if current args types match it; first copy it into local variable + OperatorDispatchKey key; + var currentImpl = previousUsed; + if (currentImpl != null && arg1Type != currentImpl.Key.Arg1Type) + currentImpl = null; + + // 3. Find implementation for arg type + if (currentImpl == null) + { + key = new OperatorDispatchKey(op, arg1Type); + if (!OperatorImplementations.TryGetValue(key, out currentImpl)) + ThrowError(Resources.ErrOpNotDefinedForType, op, arg1Type); + } + + // 4. Actually call + try + { + previousUsed = currentImpl; //set previousUsed so next time we'll try this impl first + return currentImpl.Arg1Converter(arg1); + } + catch (OverflowException) + { + if (currentImpl.OverflowHandler == null) + throw; + previousUsed = currentImpl.OverflowHandler; //set previousUsed to overflowHandler, so it will be used next time + return ExecuteUnaryOperator(op, arg1, ref previousUsed); //call self recursively + } + }//method + + //TODO: finish this + private void CheckUnassigned(object value) + { + if (value == null) + throw new Exception("Variable unassigned."); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch_Init.cs b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch_Init.cs new file mode 100644 index 0000000..05a8363 --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/LanguageRuntime_OpDispatch_Init.cs @@ -0,0 +1,691 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Linq.Expressions; +using System.Numerics; + +namespace Sanchime.Irony.Interpreter +{ + //Initialization of Runtime + public partial class LanguageRuntime + { + private static ExpressionType[] _overflowOperators = new ExpressionType[] { + ExpressionType.Add, ExpressionType.AddChecked, ExpressionType.Subtract, ExpressionType.SubtractChecked, + ExpressionType.Multiply, ExpressionType.MultiplyChecked, ExpressionType.Power}; + + // Smart boxing: boxes for a bunch of integers are preallocated + private object[] _boxes = new object[4096]; + + private const int _boxesMiddle = 2048; + + // Note: ran some primitive tests, and it appears that use of smart boxing makes it slower + // by about 5-10%; so disabling it for now + public bool SmartBoxingEnabled = false; + + private bool _supportsComplex; + private bool _supportsBigInt; + private bool _supportsRational; + + protected virtual void InitOperatorImplementations() + { + _supportsComplex = this.Language.Grammar.LanguageFlags.IsSet(LanguageFlags.SupportsComplex); + _supportsBigInt = this.Language.Grammar.LanguageFlags.IsSet(LanguageFlags.SupportsBigInt); + _supportsRational = this.Language.Grammar.LanguageFlags.IsSet(LanguageFlags.SupportsRational); + // TODO: add support for Rational + if (SmartBoxingEnabled) + InitBoxes(); + InitTypeConverters(); + InitBinaryOperatorImplementationsForMatchedTypes(); + InitUnaryOperatorImplementations(); + CreateBinaryOperatorImplementationsForMismatchedTypes(); + CreateOverflowHandlers(); + } + + //The value of smart boxing is questionable - so far did not see perf improvements, so currently it is disabled + private void InitBoxes() + { + for (int i = 0; i < _boxes.Length; i++) + _boxes[i] = i - _boxesMiddle; + } + + #region Utility methods for adding converters and binary implementations + + protected OperatorImplementation AddConverter(Type fromType, Type toType, UnaryOperatorMethod method) + { + var key = new OperatorDispatchKey(ExpressionType.ConvertChecked, fromType, toType); + var impl = new OperatorImplementation(key, toType, method); + OperatorImplementations[key] = impl; + return impl; + } + + protected OperatorImplementation AddBinaryBoxed(ExpressionType op, Type baseType, + BinaryOperatorMethod boxedBinaryMethod, BinaryOperatorMethod noBoxMethod) + { + // first create implementation without boxing + var noBoxImpl = AddBinary(op, baseType, noBoxMethod); + if (!SmartBoxingEnabled) + return noBoxImpl; + //The boxedImpl will overwrite noBoxImpl in the dictionary + var boxedImpl = AddBinary(op, baseType, boxedBinaryMethod); + boxedImpl.NoBoxImplementation = noBoxImpl; + return boxedImpl; + } + + protected OperatorImplementation AddBinary(ExpressionType op, Type baseType, BinaryOperatorMethod binaryMethod) + { + return AddBinary(op, baseType, binaryMethod, null); + } + + protected OperatorImplementation AddBinary(ExpressionType op, Type commonType, + BinaryOperatorMethod binaryMethod, UnaryOperatorMethod resultConverter) + { + var key = new OperatorDispatchKey(op, commonType, commonType); + var impl = new OperatorImplementation(key, commonType, binaryMethod, null, null, resultConverter); + OperatorImplementations[key] = impl; + return impl; + } + + protected OperatorImplementation AddUnary(ExpressionType op, Type commonType, UnaryOperatorMethod unaryMethod) + { + var key = new OperatorDispatchKey(op, commonType); + var impl = new OperatorImplementation(key, commonType, null, unaryMethod, null, null); + OperatorImplementations[key] = impl; + return impl; + } + + #endregion + + #region Initializing type converters + + public virtual void InitTypeConverters() + { + Type targetType; + + //->string + targetType = typeof(string); + AddConverter(typeof(char), targetType, ConvertAnyToString); + AddConverter(typeof(sbyte), targetType, ConvertAnyToString); + AddConverter(typeof(byte), targetType, ConvertAnyToString); + AddConverter(typeof(Int16), targetType, ConvertAnyToString); + AddConverter(typeof(UInt16), targetType, ConvertAnyToString); + AddConverter(typeof(Int32), targetType, ConvertAnyToString); + AddConverter(typeof(UInt32), targetType, ConvertAnyToString); + AddConverter(typeof(Int64), targetType, ConvertAnyToString); + AddConverter(typeof(UInt64), targetType, ConvertAnyToString); + AddConverter(typeof(Single), targetType, ConvertAnyToString); + if (_supportsBigInt) + AddConverter(typeof(BigInteger), targetType, ConvertAnyToString); + if (_supportsComplex) + AddConverter(typeof(Complex), targetType, ConvertAnyToString); + + //->Complex + if (_supportsComplex) + { + targetType = typeof(Complex); + AddConverter(typeof(sbyte), targetType, ConvertAnyToComplex); + AddConverter(typeof(byte), targetType, ConvertAnyToComplex); + AddConverter(typeof(Int16), targetType, ConvertAnyToComplex); + AddConverter(typeof(UInt16), targetType, ConvertAnyToComplex); + AddConverter(typeof(Int32), targetType, ConvertAnyToComplex); + AddConverter(typeof(UInt32), targetType, ConvertAnyToComplex); + AddConverter(typeof(Int64), targetType, ConvertAnyToComplex); + AddConverter(typeof(UInt64), targetType, ConvertAnyToComplex); + AddConverter(typeof(Single), targetType, ConvertAnyToComplex); + if (_supportsBigInt) + AddConverter(typeof(BigInteger), targetType, ConvertBigIntToComplex); + } + //->BigInteger + if (_supportsBigInt) + { + targetType = typeof(BigInteger); + AddConverter(typeof(sbyte), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(byte), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(Int16), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(UInt16), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(Int32), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(UInt32), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(Int64), targetType, ConvertAnyIntToBigInteger); + AddConverter(typeof(UInt64), targetType, ConvertAnyIntToBigInteger); + } + + //->Double + targetType = typeof(double); + AddConverter(typeof(sbyte), targetType, value => (double)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (double)(byte)value); + AddConverter(typeof(Int16), targetType, value => (double)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (double)(UInt16)value); + AddConverter(typeof(Int32), targetType, value => (double)(Int32)value); + AddConverter(typeof(UInt32), targetType, value => (double)(UInt32)value); + AddConverter(typeof(Int64), targetType, value => (double)(Int64)value); + AddConverter(typeof(UInt64), targetType, value => (double)(UInt64)value); + AddConverter(typeof(Single), targetType, value => (double)(Single)value); + if (_supportsBigInt) + AddConverter(typeof(BigInteger), targetType, value => ((double)(BigInteger)value)); + + //->Single + targetType = typeof(Single); + AddConverter(typeof(sbyte), targetType, value => (Single)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (Single)(byte)value); + AddConverter(typeof(Int16), targetType, value => (Single)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (Single)(UInt16)value); + AddConverter(typeof(Int32), targetType, value => (Single)(Int32)value); + AddConverter(typeof(UInt32), targetType, value => (Single)(UInt32)value); + AddConverter(typeof(Int64), targetType, value => (Single)(Int64)value); + AddConverter(typeof(UInt64), targetType, value => (Single)(UInt64)value); + if (_supportsBigInt) + AddConverter(typeof(BigInteger), targetType, value => (Single)(BigInteger)value); + + //->UInt64 + targetType = typeof(UInt64); + AddConverter(typeof(sbyte), targetType, value => (UInt64)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (UInt64)(byte)value); + AddConverter(typeof(Int16), targetType, value => (UInt64)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (UInt64)(UInt16)value); + AddConverter(typeof(Int32), targetType, value => (UInt64)(Int32)value); + AddConverter(typeof(UInt32), targetType, value => (UInt64)(UInt32)value); + AddConverter(typeof(Int64), targetType, value => (UInt64)(Int64)value); + + //->Int64 + targetType = typeof(Int64); + AddConverter(typeof(sbyte), targetType, value => (Int64)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (Int64)(byte)value); + AddConverter(typeof(Int16), targetType, value => (Int64)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (Int64)(UInt16)value); + AddConverter(typeof(Int32), targetType, value => (Int64)(Int32)value); + AddConverter(typeof(UInt32), targetType, value => (Int64)(UInt32)value); + + //->UInt32 + targetType = typeof(UInt32); + AddConverter(typeof(sbyte), targetType, value => (UInt32)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (UInt32)(byte)value); + AddConverter(typeof(Int16), targetType, value => (UInt32)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (UInt32)(UInt16)value); + AddConverter(typeof(Int32), targetType, value => (UInt32)(Int32)value); + + //->Int32 + targetType = typeof(Int32); + AddConverter(typeof(sbyte), targetType, value => (Int32)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (Int32)(byte)value); + AddConverter(typeof(Int16), targetType, value => (Int32)(Int16)value); + AddConverter(typeof(UInt16), targetType, value => (Int32)(UInt16)value); + + //->UInt16 + targetType = typeof(UInt16); + AddConverter(typeof(sbyte), targetType, value => (UInt16)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (UInt16)(byte)value); + AddConverter(typeof(Int16), targetType, value => (UInt16)(Int16)value); + + //->Int16 + targetType = typeof(Int16); + AddConverter(typeof(sbyte), targetType, value => (Int16)(sbyte)value); + AddConverter(typeof(byte), targetType, value => (Int16)(byte)value); + + //->byte + targetType = typeof(byte); + AddConverter(typeof(sbyte), targetType, value => (byte)(sbyte)value); + } + + // Some specialized convert implementation methods + public static object ConvertAnyToString(object value) + { + return value == null ? string.Empty : value.ToString(); + } + + public static object ConvertBigIntToComplex(object value) + { + BigInteger bi = (BigInteger)value; + return new Complex((double)bi, 0); + } + + public static object ConvertAnyToComplex(object value) + { + double d = Convert.ToDouble(value); + return new Complex(d, 0); + } + + public static object ConvertAnyIntToBigInteger(object value) + { + long l = Convert.ToInt64(value); + return new BigInteger(l); + } + + #endregion + + #region Binary operators implementations + + // Generates of binary implementations for matched argument types + public virtual void InitBinaryOperatorImplementationsForMatchedTypes() + { + // For each operator, we add a series of implementation methods for same-type operands. They are saved as OperatorImplementation + // records in OperatorImplementations table. This happens at initialization time. + // After this initialization (for same-type operands), system adds implementations for all type pairs (ex: int + double), + // using these same-type implementations and appropriate type converters. + // Note that arithmetics on byte, sbyte, int16, uint16 are performed in Int32 format (the way it's done in c# I guess) + // so the result is always Int32. We do not define operators for sbyte, byte, int16 and UInt16 types - they will + // be processed using Int32 implementation, with appropriate type converters. + ExpressionType op; + + op = ExpressionType.AddChecked; + AddBinaryBoxed(op, typeof(Int32), (x, y) => _boxes[checked((Int32)x + (Int32)y) + _boxesMiddle], + (x, y) => checked((Int32)x + (Int32)y)); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x + (UInt32)y)); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x + (Int64)y)); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x + (UInt64)y)); + AddBinary(op, typeof(Single), (x, y) => (Single)x + (Single)y); + AddBinary(op, typeof(double), (x, y) => (double)x + (double)y); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x + (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x + (BigInteger)y); + if (_supportsComplex) + AddBinary(op, typeof(Complex), (x, y) => (Complex)x + (Complex)y); + AddBinary(op, typeof(string), (x, y) => (string)x + (string)y); + AddBinary(op, typeof(char), (x, y) => ((char)x).ToString() + (char)y); //force to concatenate as strings + + op = ExpressionType.SubtractChecked; + AddBinaryBoxed(op, typeof(Int32), (x, y) => _boxes[checked((Int32)x - (Int32)y) + _boxesMiddle], + (x, y) => checked((Int32)x - (Int32)y)); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x - (UInt32)y)); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x - (Int64)y)); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x - (UInt64)y)); + AddBinary(op, typeof(Single), (x, y) => (Single)x - (Single)y); + AddBinary(op, typeof(double), (x, y) => (double)x - (double)y); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x - (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x - (BigInteger)y); + if (_supportsComplex) + AddBinary(op, typeof(Complex), (x, y) => (Complex)x - (Complex)y); + + op = ExpressionType.MultiplyChecked; + AddBinaryBoxed(op, typeof(Int32), (x, y) => _boxes[checked((Int32)x * (Int32)y) + _boxesMiddle], + (x, y) => checked((Int32)x * (Int32)y)); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x * (UInt32)y)); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x * (Int64)y)); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x * (UInt64)y)); + AddBinary(op, typeof(Single), (x, y) => (Single)x * (Single)y); + AddBinary(op, typeof(double), (x, y) => (double)x * (double)y); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x * (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x * (BigInteger)y); + if (_supportsComplex) + AddBinary(op, typeof(Complex), (x, y) => (Complex)x * (Complex)y); + + op = ExpressionType.Divide; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x / (Int32)y)); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x / (UInt32)y)); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x / (Int64)y)); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x / (UInt64)y)); + AddBinary(op, typeof(Single), (x, y) => (Single)x / (Single)y); + AddBinary(op, typeof(double), (x, y) => (double)x / (double)y); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x / (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x / (BigInteger)y); + if (_supportsComplex) + AddBinary(op, typeof(Complex), (x, y) => (Complex)x / (Complex)y); + + op = ExpressionType.Modulo; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x % (Int32)y)); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x % (UInt32)y)); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x % (Int64)y)); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x % (UInt64)y)); + AddBinary(op, typeof(Single), (x, y) => (Single)x % (Single)y); + AddBinary(op, typeof(double), (x, y) => (double)x % (double)y); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x % (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x % (BigInteger)y); + + // For bitwise operator, we provide explicit implementations for "small" integer types + op = ExpressionType.And; + AddBinary(op, typeof(bool), (x, y) => (bool)x & (bool)y); + AddBinary(op, typeof(sbyte), (x, y) => (sbyte)x & (sbyte)y); + AddBinary(op, typeof(byte), (x, y) => (byte)x & (byte)y); + AddBinary(op, typeof(Int16), (x, y) => (Int16)x & (Int16)y); + AddBinary(op, typeof(UInt16), (x, y) => (UInt16)x & (UInt16)y); + AddBinary(op, typeof(Int32), (x, y) => (Int32)x & (Int32)y); + AddBinary(op, typeof(UInt32), (x, y) => (UInt32)x & (UInt32)y); + AddBinary(op, typeof(Int64), (x, y) => (Int64)x & (Int64)y); + AddBinary(op, typeof(UInt64), (x, y) => (UInt64)x & (UInt64)y); + + op = ExpressionType.Or; + AddBinary(op, typeof(bool), (x, y) => (bool)x | (bool)y); + AddBinary(op, typeof(sbyte), (x, y) => (sbyte)x | (sbyte)y); + AddBinary(op, typeof(byte), (x, y) => (byte)x | (byte)y); + AddBinary(op, typeof(Int16), (x, y) => (Int16)x | (Int16)y); + AddBinary(op, typeof(UInt16), (x, y) => (UInt16)x | (UInt16)y); + AddBinary(op, typeof(Int32), (x, y) => (Int32)x | (Int32)y); + AddBinary(op, typeof(UInt32), (x, y) => (UInt32)x | (UInt32)y); + AddBinary(op, typeof(Int64), (x, y) => (Int64)x | (Int64)y); + AddBinary(op, typeof(UInt64), (x, y) => (UInt64)x | (UInt64)y); + + op = ExpressionType.ExclusiveOr; + AddBinary(op, typeof(bool), (x, y) => (bool)x ^ (bool)y); + AddBinary(op, typeof(sbyte), (x, y) => (sbyte)x ^ (sbyte)y); + AddBinary(op, typeof(byte), (x, y) => (byte)x ^ (byte)y); + AddBinary(op, typeof(Int16), (x, y) => (Int16)x ^ (Int16)y); + AddBinary(op, typeof(UInt16), (x, y) => (UInt16)x ^ (UInt16)y); + AddBinary(op, typeof(Int32), (x, y) => (Int32)x ^ (Int32)y); + AddBinary(op, typeof(UInt32), (x, y) => (UInt32)x ^ (UInt32)y); + AddBinary(op, typeof(Int64), (x, y) => (Int64)x ^ (Int64)y); + AddBinary(op, typeof(UInt64), (x, y) => (UInt64)x ^ (UInt64)y); + + op = ExpressionType.LessThan; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x < (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x < (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x < (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x < (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x < (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x < (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x < (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x < (BigInteger)y, BoolResultConverter); + + op = ExpressionType.GreaterThan; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x > (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x > (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x > (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x > (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x > (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x > (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x > (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x > (BigInteger)y, BoolResultConverter); + + op = ExpressionType.LessThanOrEqual; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x <= (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x <= (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x <= (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x <= (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x <= (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x <= (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x <= (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x <= (BigInteger)y, BoolResultConverter); + + op = ExpressionType.GreaterThanOrEqual; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x >= (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x >= (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x >= (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x >= (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x >= (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x >= (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x >= (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x >= (BigInteger)y, BoolResultConverter); + + op = ExpressionType.Equal; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x == (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x == (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x == (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x == (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x == (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x == (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x == (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x == (BigInteger)y, BoolResultConverter); + + op = ExpressionType.NotEqual; + AddBinary(op, typeof(Int32), (x, y) => checked((Int32)x != (Int32)y), BoolResultConverter); + AddBinary(op, typeof(UInt32), (x, y) => checked((UInt32)x != (UInt32)y), BoolResultConverter); + AddBinary(op, typeof(Int64), (x, y) => checked((Int64)x != (Int64)y), BoolResultConverter); + AddBinary(op, typeof(UInt64), (x, y) => checked((UInt64)x != (UInt64)y), BoolResultConverter); + AddBinary(op, typeof(Single), (x, y) => (Single)x != (Single)y, BoolResultConverter); + AddBinary(op, typeof(double), (x, y) => (double)x != (double)y, BoolResultConverter); + AddBinary(op, typeof(decimal), (x, y) => (decimal)x != (decimal)y); + if (_supportsBigInt) + AddBinary(op, typeof(BigInteger), (x, y) => (BigInteger)x != (BigInteger)y, BoolResultConverter); + }//method + + public virtual void InitUnaryOperatorImplementations() + { + var op = ExpressionType.UnaryPlus; + AddUnary(op, typeof(sbyte), x => +(sbyte)x); + AddUnary(op, typeof(byte), x => +(byte)x); + AddUnary(op, typeof(Int16), x => +(Int16)x); + AddUnary(op, typeof(UInt16), x => +(UInt16)x); + AddUnary(op, typeof(Int32), x => +(Int32)x); + AddUnary(op, typeof(UInt32), x => +(UInt32)x); + AddUnary(op, typeof(Int64), x => +(Int64)x); + AddUnary(op, typeof(UInt64), x => +(UInt64)x); + AddUnary(op, typeof(Single), x => +(Single)x); + AddUnary(op, typeof(double), x => +(double)x); + AddUnary(op, typeof(decimal), x => +(decimal)x); + if (_supportsBigInt) + AddUnary(op, typeof(BigInteger), x => +(BigInteger)x); + + op = ExpressionType.Negate; + AddUnary(op, typeof(sbyte), x => -(sbyte)x); + AddUnary(op, typeof(byte), x => -(byte)x); + AddUnary(op, typeof(Int16), x => -(Int16)x); + AddUnary(op, typeof(UInt16), x => -(UInt16)x); + AddUnary(op, typeof(Int32), x => -(Int32)x); + AddUnary(op, typeof(UInt32), x => -(UInt32)x); + AddUnary(op, typeof(Int64), x => -(Int64)x); + AddUnary(op, typeof(Single), x => -(Single)x); + AddUnary(op, typeof(double), x => -(double)x); + AddUnary(op, typeof(decimal), x => -(decimal)x); + if (_supportsBigInt) + AddUnary(op, typeof(BigInteger), x => -(BigInteger)x); + if (_supportsComplex) + AddUnary(op, typeof(Complex), x => -(Complex)x); + + op = ExpressionType.Not; + AddUnary(op, typeof(bool), x => !(bool)x); + AddUnary(op, typeof(sbyte), x => ~(sbyte)x); + AddUnary(op, typeof(byte), x => ~(byte)x); + AddUnary(op, typeof(Int16), x => ~(Int16)x); + AddUnary(op, typeof(UInt16), x => ~(UInt16)x); + AddUnary(op, typeof(Int32), x => ~(Int32)x); + AddUnary(op, typeof(UInt32), x => ~(UInt32)x); + AddUnary(op, typeof(Int64), x => ~(Int64)x); + } + + // Generates binary implementations for mismatched argument types + public virtual void CreateBinaryOperatorImplementationsForMismatchedTypes() + { + // find all data types are there + var allTypes = new HashSet(); + var allBinOps = new HashSet(); + foreach (var kv in OperatorImplementations) + { + allTypes.Add(kv.Key.Arg1Type); + if (kv.Value.BaseBinaryMethod != null) + allBinOps.Add(kv.Key.Op); + } + foreach (var arg1Type in allTypes) + foreach (var arg2Type in allTypes) + if (arg1Type != arg2Type) + foreach (ExpressionType op in allBinOps) + CreateBinaryOperatorImplementation(op, arg1Type, arg2Type); + }//method + + // Creates a binary implementations for an operator with mismatched argument types. + // Determines common type, retrieves implementation for operator with both args of common type, then creates + // implementation for mismatched types using type converters (by converting to common type) + public OperatorImplementation CreateBinaryOperatorImplementation(ExpressionType op, Type arg1Type, Type arg2Type) + { + Type commonType = GetCommonTypeForOperator(op, arg1Type, arg2Type); + if (commonType == null) + return null; + //Get base method for the operator and common type + var baseImpl = FindBaseImplementation(op, commonType); + if (baseImpl == null) + { //Try up-type + commonType = GetUpType(commonType); + if (commonType == null) + return null; + baseImpl = FindBaseImplementation(op, commonType); + } + if (baseImpl == null) + return null; + //Create implementation and save it in implementations table + var impl = CreateBinaryOperatorImplementation(op, arg1Type, arg2Type, commonType, baseImpl.BaseBinaryMethod, baseImpl.ResultConverter); + OperatorImplementations[impl.Key] = impl; + return impl; + } + + protected virtual OperatorImplementation CreateBinaryOperatorImplementation(ExpressionType op, Type arg1Type, Type arg2Type, + Type commonType, BinaryOperatorMethod method, UnaryOperatorMethod resultConverter) + { + OperatorDispatchKey key = new OperatorDispatchKey(op, arg1Type, arg2Type); + UnaryOperatorMethod arg1Converter = arg1Type == commonType ? null : GetConverter(arg1Type, commonType); + UnaryOperatorMethod arg2Converter = arg2Type == commonType ? null : GetConverter(arg2Type, commonType); + var impl = new OperatorImplementation( + key, commonType, method, arg1Converter, arg2Converter, resultConverter); + return impl; + } + + // Creates overflow handlers. For each implementation, checks if operator can overflow; + // if yes, creates and sets an overflow handler - another implementation that performs + // operation using "upper" type that wouldn't overflow. For ex: (int * int) has overflow handler (int64 * int64) + protected virtual void CreateOverflowHandlers() + { + foreach (var impl in OperatorImplementations.Values) + { + if (!CanOverflow(impl)) + continue; + var key = impl.Key; + var upType = GetUpType(impl.CommonType); + if (upType == null) + continue; + var upBaseImpl = FindBaseImplementation(key.Op, upType); + if (upBaseImpl == null) + continue; + impl.OverflowHandler = CreateBinaryOperatorImplementation(key.Op, key.Arg1Type, key.Arg2Type, upType, + upBaseImpl.BaseBinaryMethod, upBaseImpl.ResultConverter); + // Do not put OverflowHandler into OperatoImplementations table! - it will override some other, non-overflow impl + } + } + + private OperatorImplementation FindBaseImplementation(ExpressionType op, Type commonType) + { + var baseKey = new OperatorDispatchKey(op, commonType, commonType); + OperatorImplementation baseImpl; + OperatorImplementations.TryGetValue(baseKey, out baseImpl); + return baseImpl; + } + + // Important: returns null if fromType == toType + public virtual UnaryOperatorMethod GetConverter(Type fromType, Type toType) + { + if (fromType == toType) + return (x => x); + var key = new OperatorDispatchKey(ExpressionType.ConvertChecked, fromType, toType); + OperatorImplementation impl; + if (!OperatorImplementations.TryGetValue(key, out impl)) + return null; + return impl.Arg1Converter; + } + + #endregion + + #region Utilities + + private static bool CanOverflow(OperatorImplementation impl) + { + if (!CanOverflow(impl.Key.Op)) + return false; + if (impl.CommonType == typeof(Int32) && IsSmallInt(impl.Key.Arg1Type) && IsSmallInt(impl.Key.Arg2Type)) + return false; + if (impl.CommonType == typeof(double) || impl.CommonType == typeof(Single)) + return false; + if (impl.CommonType == typeof(BigInteger)) + return false; + return true; + } + + private static bool CanOverflow(ExpressionType expression) + { + return _overflowOperators.Contains(expression); + } + + private static bool IsSmallInt(Type type) + { + return type == typeof(byte) || type == typeof(sbyte) || type == typeof(Int16) || type == typeof(UInt16); + } + + /// + /// Returns the type to which arguments should be converted to perform the operation + /// for a given operator and arguments types. + /// + /// Operator. + /// The type of the first argument. + /// The type of the second argument + /// A common type for operation. + protected virtual Type GetCommonTypeForOperator(ExpressionType op, Type argType1, Type argType2) + { + if (argType1 == argType2) + return argType1; + + //TODO: see how to handle properly null/NoneValue in expressions + // var noneType = typeof(NoneClass); + // if (argType1 == noneType || argType2 == noneType) return noneType; + + // Check for unsigned types and convert to signed versions + var t1 = GetSignedTypeForUnsigned(argType1); + var t2 = GetSignedTypeForUnsigned(argType2); + // The type with higher index in _typesSequence is the commont type + var index1 = _typesSequence.IndexOf(t1); + var index2 = _typesSequence.IndexOf(t2); + if (index1 >= 0 && index2 >= 0) + return _typesSequence[Math.Max(index1, index2)]; + //If we have some custom type, + return null; + }//method + + // If a type is one of "unsigned" int types, returns next bigger signed type + protected virtual Type GetSignedTypeForUnsigned(Type type) + { + if (!_unsignedTypes.Contains(type)) return type; + if (type == typeof(byte) || type == typeof(UInt16)) return typeof(int); + if (type == typeof(UInt32)) return typeof(Int64); + if (type == typeof(UInt64)) return typeof(Int64); //let's remain in Int64 + return typeof(BigInteger); + } + + /// + /// Returns the "up-type" to use in operation instead of the type that caused overflow. + /// + /// The base type for operation that caused overflow. + /// The type to use for operation. + /// + /// Can be overwritten in language implementation to implement different type-conversion policy. + /// + protected virtual Type GetUpType(Type type) + { + // In fact we do not need to care about unsigned types - they are eliminated from common types for operations, + // so "type" parameter can never be unsigned type. But just in case... + if (_unsignedTypes.Contains(type)) + return GetSignedTypeForUnsigned(type); //it will return "upped" type in fact + if (type == typeof(byte) || type == typeof(sbyte) || type == typeof(UInt16) || type == typeof(Int16)) + return typeof(int); + if (type == typeof(Int32)) + return typeof(Int64); + if (type == typeof(Int64)) + return typeof(BigInteger); + return null; + } + + //Note bool type at the end - if any of operands is of bool type, convert the other to bool as well + private static TypeList _typesSequence = new TypeList( + typeof(sbyte), typeof(Int16), typeof(Int32), typeof(Int64), typeof(BigInteger), // typeof(Rational) + typeof(Single), typeof(Double), typeof(Complex), + typeof(bool), typeof(char), typeof(string) + ); + + private static TypeList _unsignedTypes = new TypeList( + typeof(byte), typeof(UInt16), typeof(UInt32), typeof(UInt64) + ); + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/NoneClass.cs b/src/Irony.Interpreter/LanguageRuntime/NoneClass.cs new file mode 100644 index 0000000..52156ee --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/NoneClass.cs @@ -0,0 +1,39 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter +{ + // A class for special reserved None value used in many scripting languages. + public class NoneClass + { + private string _toString; + + private NoneClass() + { + _toString = Resources.LabelNone; + } + + public NoneClass(string toString) + { + _toString = toString; + } + + public override string ToString() + { + return _toString; + } + + public static NoneClass Value = new NoneClass(); + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/OperatorImplementation.cs b/src/Irony.Interpreter/LanguageRuntime/OperatorImplementation.cs new file mode 100644 index 0000000..db2a557 --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/OperatorImplementation.cs @@ -0,0 +1,244 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Linq.Expressions; + +namespace Sanchime.Irony.Interpreter +{ + public delegate object UnaryOperatorMethod(object arg); + + public delegate object BinaryOperatorMethod(object arg1, object arg2); + + #region OperatorDispatchKey class + + /// + /// The struct is used as a key for the dictionary of operator implementations. + /// Contains types of arguments for a method or operator implementation. + /// + public struct OperatorDispatchKey + { + public static readonly OperatorDispatchKeyComparer Comparer = new OperatorDispatchKeyComparer(); + public readonly ExpressionType Op; + public readonly Type Arg1Type; + public readonly Type Arg2Type; + public readonly int HashCode; + + //For binary operators + public OperatorDispatchKey(ExpressionType op, Type arg1Type, Type arg2Type) + { + Op = op; + Arg1Type = arg1Type; + Arg2Type = arg2Type; + int h0 = (int)Op; + int h1 = Arg1Type.GetHashCode(); + int h2 = Arg2Type.GetHashCode(); + HashCode = unchecked(h0 << 8 ^ h1 << 4 ^ h2); + } + + //For unary operators + public OperatorDispatchKey(ExpressionType op, Type arg1Type) + { + Op = op; + Arg1Type = arg1Type; + Arg2Type = null; + int h0 = (int)Op; + int h1 = Arg1Type.GetHashCode(); + int h2 = 0; + HashCode = unchecked(h0 << 8 ^ h1 << 4 ^ h2); + } + + public override int GetHashCode() + { + return HashCode; + } + + public override string ToString() + { + return Op + "(" + Arg1Type + ", " + Arg2Type + ")"; + } + }//class + + #endregion + + #region OperatorDispatchKeyComparer class + + // Note: I believe (guess) that a custom Comparer provided to a Dictionary is a bit more efficient + // than implementing IComparable on the key itself + public class OperatorDispatchKeyComparer : IEqualityComparer + { + public bool Equals(OperatorDispatchKey x, OperatorDispatchKey y) + { + return x.HashCode == y.HashCode && x.Op == y.Op && x.Arg1Type == y.Arg1Type && x.Arg2Type == y.Arg2Type; + } + + public int GetHashCode(OperatorDispatchKey obj) + { + return obj.HashCode; + } + }//class + + #endregion + + public class TypeConverterTable : Dictionary + { + public TypeConverterTable(int capacity) : base(capacity, OperatorDispatchKey.Comparer) + { + } + }//class + + public class OperatorImplementationTable : Dictionary + { + public OperatorImplementationTable(int capacity) : base(capacity, OperatorDispatchKey.Comparer) + { + } + } + + /// + ///The OperatorImplementation class represents an implementation of an operator for specific argument types. + /// + /// + /// The OperatorImplementation is used for holding implementation for binary operators, unary operators, + /// and type converters (special case of unary operators) + /// it holds 4 method references for binary operators: + /// converters for both arguments, implementation method and converter for the result. + /// For unary operators (and type converters) the implementation is in Arg1Converter + /// operator (arg1 is used); the converter method is stored in Arg1Converter; the target type is in CommonType + /// + public sealed class OperatorImplementation + { + public readonly OperatorDispatchKey Key; + + // The type to which arguments are converted and no-conversion method for this type. + public readonly Type CommonType; + + public readonly BinaryOperatorMethod BaseBinaryMethod; + + //converters + internal UnaryOperatorMethod Arg1Converter; + + internal UnaryOperatorMethod Arg2Converter; + internal UnaryOperatorMethod ResultConverter; + + //A reference to the actual binary evaluator method - one of EvaluateConvXXX + public BinaryOperatorMethod EvaluateBinary; + + // An overflow handler - the implementation to handle arithmetic overflow + public OperatorImplementation OverflowHandler; + + // No-box counterpart for implementations with auto-boxed output. If this field <> null, then this is + // implementation with auto-boxed output + public OperatorImplementation NoBoxImplementation; + + //Constructor for binary operators + public OperatorImplementation(OperatorDispatchKey key, Type resultType, BinaryOperatorMethod baseBinaryMethod, + UnaryOperatorMethod arg1Converter, UnaryOperatorMethod arg2Converter, UnaryOperatorMethod resultConverter) + { + Key = key; + CommonType = resultType; + Arg1Converter = arg1Converter; + Arg2Converter = arg2Converter; + ResultConverter = resultConverter; + BaseBinaryMethod = baseBinaryMethod; + SetupEvaluationMethod(); + } + + //Constructor for unary operators and type converters + public OperatorImplementation(OperatorDispatchKey key, Type type, UnaryOperatorMethod method) + { + Key = key; + CommonType = type; + Arg1Converter = method; + Arg2Converter = null; + ResultConverter = null; + BaseBinaryMethod = null; + } + + public override string ToString() + { + return "[OpImpl for " + Key.ToString() + "]"; + } + + public void SetupEvaluationMethod() + { + if (BaseBinaryMethod == null) + //special case - it is unary method, the method itself in Arg1Converter; LanguageRuntime.ExecuteUnaryOperator will handle this properly + return; + // Binary operator + if (ResultConverter == null) + { + //without ResultConverter + if (Arg1Converter == null && Arg2Converter == null) + EvaluateBinary = EvaluateConvNone; + else if (Arg1Converter != null && Arg2Converter == null) + EvaluateBinary = EvaluateConvLeft; + else if (Arg1Converter == null && Arg2Converter != null) + EvaluateBinary = EvaluateConvRight; + else // if (Arg1Converter != null && arg2Converter != null) + EvaluateBinary = EvaluateConvBoth; + } + else + { + //with result converter + if (Arg1Converter == null && Arg2Converter == null) + EvaluateBinary = EvaluateConvNoneConvResult; + else if (Arg1Converter != null && Arg2Converter == null) + EvaluateBinary = EvaluateConvLeftConvResult; + else if (Arg1Converter == null && Arg2Converter != null) + EvaluateBinary = EvaluateConvRightConvResult; + else // if (Arg1Converter != null && Arg2Converter != null) + EvaluateBinary = EvaluateConvBothConvResult; + } + } + + private object EvaluateConvNone(object arg1, object arg2) + { + return BaseBinaryMethod(arg1, arg2); + } + + private object EvaluateConvLeft(object arg1, object arg2) + { + return BaseBinaryMethod(Arg1Converter(arg1), arg2); + } + + private object EvaluateConvRight(object arg1, object arg2) + { + return BaseBinaryMethod(arg1, Arg2Converter(arg2)); + } + + private object EvaluateConvBoth(object arg1, object arg2) + { + return BaseBinaryMethod(Arg1Converter(arg1), Arg2Converter(arg2)); + } + + private object EvaluateConvNoneConvResult(object arg1, object arg2) + { + return ResultConverter(BaseBinaryMethod(arg1, arg2)); + } + + private object EvaluateConvLeftConvResult(object arg1, object arg2) + { + return ResultConverter(BaseBinaryMethod(Arg1Converter(arg1), arg2)); + } + + private object EvaluateConvRightConvResult(object arg1, object arg2) + { + return ResultConverter(BaseBinaryMethod(arg1, Arg2Converter(arg2))); + } + + private object EvaluateConvBothConvResult(object arg1, object arg2) + { + return ResultConverter(BaseBinaryMethod(Arg1Converter(arg1), Arg2Converter(arg2))); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/LanguageRuntime/SpecialFormsLibrary.cs b/src/Irony.Interpreter/LanguageRuntime/SpecialFormsLibrary.cs new file mode 100644 index 0000000..01a85cc --- /dev/null +++ b/src/Irony.Interpreter/LanguageRuntime/SpecialFormsLibrary.cs @@ -0,0 +1,14 @@ +namespace Sanchime.Irony.Interpreter +{ + public delegate object SpecialForm(ScriptThread thread, AstNode[] childNodes); + + public static class SpecialFormsLibrary + { + public static object Iif(ScriptThread thread, AstNode[] childNodes) + { + var testValue = childNodes[0].Evaluate(thread); + object result = thread.Runtime.IsTrue(testValue) ? childNodes[1].Evaluate(thread) : childNodes[2].Evaluate(thread); + return result; + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Sanchime.Irony.Interpreter.csproj b/src/Irony.Interpreter/Sanchime.Irony.Interpreter.csproj new file mode 100644 index 0000000..5e27008 --- /dev/null +++ b/src/Irony.Interpreter/Sanchime.Irony.Interpreter.csproj @@ -0,0 +1,25 @@ + + + + netstandard2.0 + Sanchime.Irony.Interpreter.NetCore + 10.0 + enable + Irony.NetCore is a .NET Core compatible version of the Irony framework initially developed and maintained by Roman Ivantsov. Irony is a development kit for implementing languages on .NET platform. In Irony the target language grammar is coded directly in c# using operator overloading to express grammar constructs. Irony's scanner and parser modules use the grammar encoded as c# class to control the parsing process. + LICENSE + irony;parser + Github + + + + + + + + + True + + + + + diff --git a/src/Irony.Interpreter/Scopes/AppDataMap.cs b/src/Irony.Interpreter/Scopes/AppDataMap.cs new file mode 100644 index 0000000..b2cbdab --- /dev/null +++ b/src/Irony.Interpreter/Scopes/AppDataMap.cs @@ -0,0 +1,45 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + /// Represents a set of all of static scopes/modules in the application. + public class AppDataMap + { + public AstNode ProgramRoot; //artificial root associated with MainModule + public ScopeInfoList StaticScopeInfos = new ScopeInfoList(); + public ModuleInfoList Modules = new ModuleInfoList(); + public ModuleInfo MainModule; + public readonly bool LanguageCaseSensitive; + + public AppDataMap(bool languageCaseSensitive, AstNode programRoot = null) + { + LanguageCaseSensitive = languageCaseSensitive; + ProgramRoot = programRoot ?? new AstNode(); + var mainScopeInfo = new ScopeInfo(ProgramRoot, LanguageCaseSensitive); + StaticScopeInfos.Add(mainScopeInfo); + mainScopeInfo.StaticIndex = 0; + MainModule = new ModuleInfo("main", "main", mainScopeInfo); + Modules.Add(MainModule); + } + + public ModuleInfo GetModule(AstNode moduleNode) + { + foreach (var m in Modules) + if (m.ScopeInfo == moduleNode.DependentScopeInfo) + return m; + return null; + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/ModuleInfo.cs b/src/Irony.Interpreter/Scopes/ModuleInfo.cs new file mode 100644 index 0000000..698fd11 --- /dev/null +++ b/src/Irony.Interpreter/Scopes/ModuleInfo.cs @@ -0,0 +1,40 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + public class ModuleInfoList : List + { } + + public class ModuleInfo + { + public readonly string Name; + public readonly string FileName; + public readonly ScopeInfo ScopeInfo; //scope for module variables + public readonly BindingSourceList Imports = new BindingSourceList(); + + public ModuleInfo(string name, string fileName, ScopeInfo scopeInfo) + { + Name = name; + FileName = fileName; + ScopeInfo = scopeInfo; + } + + //Used for imported modules + public Binding BindToExport(BindingRequest request) + { + return null; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/Scope.cs b/src/Irony.Interpreter/Scopes/Scope.cs new file mode 100644 index 0000000..3b71f4a --- /dev/null +++ b/src/Irony.Interpreter/Scopes/Scope.cs @@ -0,0 +1,74 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + public class Scope : ScopeBase + { + public object[] Parameters; + public Scope Caller; + public Scope Creator; //either caller or closure parent + private Scope _parent; //computed on demand + + public Scope(ScopeInfo scopeInfo, Scope caller, Scope creator, object[] parameters) : base(scopeInfo) + { + Caller = caller; + Creator = creator; + Parameters = parameters; + } + + public object[] GetParameters() + { + return Parameters; + } + + public object GetParameter(int index) + { + return Parameters[index]; + } + + public void SetParameter(int index, object value) + { + Parameters[index] = value; + } + + // Lexical parent, computed on demand + public Scope Parent + { + get + { + if (_parent == null) + _parent = GetParent(); + return _parent; + } + set { _parent = value; } + } + + protected Scope GetParent() + { + // Walk along creators chain and find a scope with ScopeInfo matching this.ScopeInfo.Parent + var parentScopeInfo = Info.Parent; + if (parentScopeInfo == null) + return null; + var current = Creator; + while (current != null) + { + if (current.Info == parentScopeInfo) + return current; + current = current.Creator; + } + return null; + }// method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/ScopeBase.cs b/src/Irony.Interpreter/Scopes/ScopeBase.cs new file mode 100644 index 0000000..0fdb8b9 --- /dev/null +++ b/src/Irony.Interpreter/Scopes/ScopeBase.cs @@ -0,0 +1,121 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + public class ScopeBase + { + public ScopeInfo Info; + public volatile object[] Values; + + public ScopeBase(ScopeInfo scopeInfo) : this(scopeInfo, null) + { + } + + public ScopeBase(ScopeInfo scopeInfo, object[] values) + { + Info = scopeInfo; + Values = values; + if (Values == null) + Values = new object[scopeInfo.ValuesCount]; + } + + public SlotInfo AddSlot(string name) + { + var slot = Info.AddSlot(name, SlotType.Value); + if (slot.Index >= Values.Length) + Resize(Values.Length + 4); + return slot; + } + + public object[] GetValues() + { + return Values; + } + + public object GetValue(int index) + { + try + { + var tmp = Values; + // The following line may throw null-reference exception (tmp==null), if resizing is happening at the same time + // It may also throw IndexOutOfRange exception if new variable was added by another thread in another frame(scope) + // but this scope and Values array were created before that, so Values is shorter than #slots in SlotInfo. + // But in this case, it does not matter, result value is null (unassigned) + return tmp[index]; + } + catch (NullReferenceException) + { + Thread.Sleep(0); // Silverlight does not have Thread.Yield; + // Thread.Yield(); // maybe SpinWait.SpinOnce? + return GetValue(index); //repeat attempt + } + catch (IndexOutOfRangeException) + { + return null; //we do not resize here, value is unassigned anyway. + } + }//method + + public void SetValue(int index, object value) + { + try + { + var tmp = Values; + // The following line may throw null-reference exception (tmp==null), if resizing is happening at the same time + // It may also throw IndexOutOfRange exception if new variable was added by another thread in another frame(scope) + // but this scope and Values array were created before that, so Values is shorter than #slots in SlotInfo + tmp[index] = value; + //Now check that tmp is the same as Values - if not, then resizing happened in the middle, + // so repeat assignment to make sure the value is in resized array. + if (tmp != Values) + SetValue(index, value); // do it again + } + catch (NullReferenceException) + { + Thread.Sleep(0); // it's OK to Sleep intead of SpinWait - it is really rare event, so we don't care losing a few more cycles here. + SetValue(index, value); //repeat it again + } + catch (IndexOutOfRangeException) + { + Resize(Info.GetSlotCount()); + SetValue(index, value); //repeat it again + } + }//method + + // Disabling warning: 'Values: a reference to a volatile field will not be treated as volatile' + // According to MSDN for CS0420 warning (see http://msdn.microsoft.com/en-us/library/4bw5ewxy.aspx), + // this does NOT apply to Interlocked API - which we use here. + + protected void Resize(int newSize) + { + lock (Info.LockObject) + { + if (Values.Length >= newSize) return; + object[] tmp = Interlocked.Exchange(ref Values, null); + Array.Resize(ref tmp, newSize); + Interlocked.Exchange(ref Values, tmp); + } + } + + public IDictionary AsDictionary() + { + return new ScopeValuesDictionary(this); + } + + public override string ToString() + { + return Info.ToString(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/ScopeInfo.cs b/src/Irony.Interpreter/Scopes/ScopeInfo.cs new file mode 100644 index 0000000..eaa787b --- /dev/null +++ b/src/Irony.Interpreter/Scopes/ScopeInfo.cs @@ -0,0 +1,127 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + public class ScopeInfoList : List + { } + + /// Describes all variables (locals and parameters) defined in a scope of a function or module. + /// ScopeInfo is metadata, it does not contain variable values. The Scope object (described by ScopeInfo) is a container for values. + // Note that all access to SlotTable is done through "lock" operator, so it's thread safe + public class ScopeInfo + { + public int ValuesCount, ParametersCount; + public AstNode OwnerNode; //might be null + + // Static/singleton scopes only; for ex, modules are singletons. Index in App.StaticScopes array + public int StaticIndex = -1; + + public int Level; + public readonly string AsString; + public Scope ScopeInstance; //Experiment: reusable scope instance; see ScriptThread.cs class + + private SlotInfoDictionary _slots; + protected internal object LockObject = new object(); + + public ScopeInfo(AstNode ownerNode, bool caseSensitive) + { + OwnerNode = ownerNode ?? throw new Exception("ScopeInfo owner node may not be null."); + _slots = new SlotInfoDictionary(caseSensitive); + Level = Parent == null ? 0 : Parent.Level + 1; + var sLevel = "level=" + Level; + AsString = OwnerNode == null ? sLevel : OwnerNode.AsString + ", " + sLevel; + } + + //Lexical parent + public ScopeInfo Parent + { + get + { + if (_parent == null) + _parent = GetParent(); + return _parent; + } + } + + private ScopeInfo _parent; + + public ScopeInfo GetParent() + { + if (OwnerNode == null) return null; + var currentParent = OwnerNode.Parent; + while (currentParent != null) + { + var result = currentParent.DependentScopeInfo; + if (result != null) return result; + currentParent = currentParent.Parent; + } + return null; //should never happen + } + + #region Slot operations + + public SlotInfo AddSlot(string name, SlotType type) + { + lock (LockObject) + { + var index = type == SlotType.Value ? ValuesCount++ : ParametersCount++; + var slot = new SlotInfo(this, type, name, index); + _slots.Add(name, slot); + return slot; + } + } + + //Returns null if slot not found. + public SlotInfo GetSlot(string name) + { + lock (LockObject) + { + _slots.TryGetValue(name, out var slot); + return slot; + } + } + + public IList GetSlots() + { + lock (LockObject) + { + return new List(_slots.Values); + } + } + + public IList GetNames() + { + lock (LockObject) + { + return new List(_slots.Keys); + } + } + + public int GetSlotCount() + { + lock (LockObject) + { + return _slots.Count; + } + } + + #endregion + + public override string ToString() + { + return AsString; + } + }//class +} //namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/ScopeValuesDictionary.cs b/src/Irony.Interpreter/Scopes/ScopeValuesDictionary.cs new file mode 100644 index 0000000..689f4ff --- /dev/null +++ b/src/Irony.Interpreter/Scopes/ScopeValuesDictionary.cs @@ -0,0 +1,132 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + /// + /// A wrapper around Scope exposing it as a string-object dictionary. Used to expose Globals dictionary from Main scope + /// + public class ScopeValuesDictionary : IDictionary + { + private ScopeBase _scope; + + internal ScopeValuesDictionary(ScopeBase scope) + { + _scope = scope; + } + + public void Add(string key, object value) + { + var slot = _scope.Info.GetSlot(key); + if (slot == null) + slot = _scope.AddSlot(key); + _scope.SetValue(slot.Index, value); + } + + public bool ContainsKey(string key) + { + return _scope.Info.GetSlot(key) != null; + } + + public ICollection Keys + { + get { return _scope.Info.GetNames(); } + } + + //We do not remove the slotInfo (you can't do that, slot set can only grow); instead we set the value to null + // to indicate "unassigned" + public bool Remove(string key) + { + this[key] = null; + return true; + } + + public bool TryGetValue(string key, out object value) + { + value = null; + SlotInfo slot = _scope.Info.GetSlot(key); + if (slot == null) + return false; + value = _scope.GetValue(slot.Index); + return true; + } + + public ICollection Values + { + get { return _scope.GetValues(); } + } + + public object this[string key] + { + get + { + TryGetValue(key, out object value); + return value; + } + set + { + Add(key, value); + } + } + + public void Add(KeyValuePair item) + { + Add(item.Key, item.Value); + } + + public void Clear() + { + var values = _scope.GetValues(); + for (int i = 0; i < values.Length; i++) + values[i] = null; + } + + public bool Contains(KeyValuePair item) + { + return _scope.Info.GetSlot(item.Key) != null; + } + + public void CopyTo(KeyValuePair[] array, int arrayIndex) + { + throw new NotImplementedException(); + } + + public int Count + { + get { return _scope.Info.GetSlotCount(); } + } + + public bool IsReadOnly + { + get { return true; } + } + + public bool Remove(KeyValuePair item) + { + return Remove(item.Key); + } + + public IEnumerator> GetEnumerator() + { + var slots = _scope.Info.GetSlots(); //make local copy + foreach (var slot in slots) + yield return new KeyValuePair(slot.Name, _scope.GetValue(slot.Index)); + } + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/SlotInfo.cs b/src/Irony.Interpreter/Scopes/SlotInfo.cs new file mode 100644 index 0000000..c7d3e2a --- /dev/null +++ b/src/Irony.Interpreter/Scopes/SlotInfo.cs @@ -0,0 +1,48 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.Scopes +{ + public enum SlotType + { + Value, //local or property value + Parameter, //function parameter + Function, + Closure, + } + + /// Describes a variable. + public class SlotInfo + { + public readonly ScopeInfo ScopeInfo; + public readonly SlotType Type; + public readonly string Name; + public readonly int Index; + public bool IsPublic = true; //for module-level slots, indicator that symbol is "exported" and visible by code that imports the module + + internal SlotInfo(ScopeInfo scopeInfo, SlotType type, string name, int index) + { + ScopeInfo = scopeInfo; + Type = type; + Name = name; + Index = index; + } + } + + public class SlotInfoDictionary : Dictionary + { + public SlotInfoDictionary(bool caseSensitive) + : base(32, caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase) { } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Scopes/_about_storage.txt b/src/Irony.Interpreter/Scopes/_about_storage.txt new file mode 100644 index 0000000..862017d --- /dev/null +++ b/src/Irony.Interpreter/Scopes/_about_storage.txt @@ -0,0 +1,84 @@ +Variables/values storage - some traditonal approaches + +1. In scripting languages, the data elements (fields, properties, local variables) are created on the fly, on the first write. There's no pre-allocation at compile time. The set of variables is unknown in advance. Traditional, straightforward solution is to use a dictionary (string=>object) to store local variables of a function, or module-level variables. +2. In free-threaded environment the variables may be accessed from different threads. This means that the access to containing dictionaries should be performed using thread-locking mechanism, guaanteeing that only a single thread is accessing a dictionary. +So the script statement like: + + x = 5 (1) + +is translated into something like this: + + lock(scope) { (2) + scope.Values["x"] = 5; + } + +Two important and unfortunate observations. +1. Dictionary access is slow. At least if we mean .NET Dictionary generic class. Simple tests coupled with source code inspection show that the cost is in the range of hundreds of processor instructions. +2. Thread locking is slow. The cost is also in the range of hundreds of instructions. + +The result is that implementation (2) is quite slow. Really slow, especially considering the fact that actual thread collisions on the same dictionary objects are quite rare, while we have to incur the extra cost of lock every time we read or write a value. + +What can be done better? +Our interpreter stores data in linear arrays, and does NOT use thread locking when reading/writing the data. There is an explicit locking when we "create" a variable for the first time - we lock meta-data dictionary containing "descriptions" of data slots; but then all subsequent accesses to the value are done performed using the variable index. +But before we explain how it works, we need to state one explicit assumption we rely on: + + Assumption: + Assignment of an object reference to a variable (ex: x = someObj) is an atomic operation and is "thread-safe". + +So the assignment can be safely done without thread locking. if one thread makes an assignment, and the other thread reads the reference, this other thread would see either old or new value, but never any "corrupted middle". This assumption mostly concerns safety of Reading from another thread. A special case is writing or replacing the value (when we resize the Values array, we replace it with new resized copy) - see more on this below. + +Back to Irony's data storage implementation: arrays with no-lock read/write access. The data is stored in linear array of objects: Values[] field (see ScopeBase class). The field is marked with "volatile" keyword. All access is done by index. + +Let's look at an example and explain what happens. Suppose we have an AST node that represents a variable "x" with READ access. When interpreter evaluates this node for the first time, it looks up a variable metadata (SlotInfo) in current scope metadata (ScopeInfo). The result is linear index of the data value in Values array. It then reads the value using the index: + + vx = scope.Values[xSlotIndex]; + +All later evaluations will do the same - lookup by index but without looking up the SlotInfo: the xSlotIndex is cached in the node (more accurately, in SlotBinding object). Writing the value works the same way - the array element is assigned by index. +The problems comes when we need to resize the Values array because we are adding some local variable - for example, our script runs into new assignment statement in the local scope: + + y = 5 + +We need to add "y" to the list of slots (metadata), but then we also need to "extend" the Values[] array and add an extra element for "y". The question now is: how to resize Values array in such a way that if some other thread(s) is reading or writing other values in the same scope, it does it correctly even if it happens exactly at the moment when we resize the array? +Here's how we do it. First let's look at the ScopeBase.Resize method: + + #pragma warning disable 0420 + protected void Resize(int newSize) { + lock (this) { + if (Values.Length >= newSize) return; + object[] tmp = Interlocked.Exchange(ref Values, null); + Array.Resize(ref tmp, newSize); + Interlocked.Exchange(ref Values, tmp); + } + } + +We use Interlocked.Exchange to replace Values field with null as an atomic operation. We do it to force any concurrent reads/writes to fail, if they happen at exactly this time. Note that we disable a compiler warning stating that volatile field Values will not be treated as volatile in a call to Interlocked.Exchange. According to MSDN, this is usually the case with "by-ref" arguments, but Interlocked API is an exception, so we're OK here. +Now, in GetValue and SetValue methods, we expect this failure, and have a try/catch block to handle the null reference exception and retry the operation. Here's SetValues method: + + public void SetValue(int index, object value) { + try { + var tmp = Values; + tmp[index] = value; + //Now check that tmp is the same as Values - if not, then resizing happened in the middle, + // so repeat assignment to make sure the value is in resized array. + if (tmp != Values) + SetValue(index, value); // do it again + } catch (NullReferenceException) { + Thread.Sleep(0); + SetValue(index, value); //repeat it again + } ..... + }//method + + The "catch" block for NullReferenceException is for handling the situation when Values was null while other thread was resizing it. Remember that try/catch block is free, it does not add any executable commands if we run without exception. +There is additional twist when writing a value. It might happen that after we copied the Values reference into tmp variable, some other thread resized the Values array - replacing it with a new extended array. As a result, we will be writing the value into a "dead" old array. To check against this, after we do the value change we check that "tmp" and "Values" reference the same object. If not, we got concurrent resize, so we repeat SetValue to make sure we set it in the new Values instance. + +To sum it up: all variables are stored in object arrays, values are accessed by index, and accessed without explicit thread locks. The net result of this technique (and some other improvements) is approximate 5-fold performance gain - compared to old interpreter. + +NOW 5 TIMES FASTER! + +References: +Very illuminating article about low-lock memory access: +http://msdn.microsoft.com/en-us/magazine/cc163715.aspx + +Another article about spin locks and interlocked operations: +http://msdn.microsoft.com/en-us/magazine/cc163715.aspx + diff --git a/src/Irony.Interpreter/SriptApplication/CommandLine.cs b/src/Irony.Interpreter/SriptApplication/CommandLine.cs new file mode 100644 index 0000000..7a9b653 --- /dev/null +++ b/src/Irony.Interpreter/SriptApplication/CommandLine.cs @@ -0,0 +1,196 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.SriptApplication +{ + //An abstraction of a Console. + public interface IConsoleAdaptor + { + bool Canceled { get; set; } + + void Write(string text); + + void WriteLine(string text); + + void SetTextStyle(ConsoleTextStyle style); + + int Read(); //reads a key + + string ReadLine(); //reads a line; returns null if Ctrl-C is pressed + + void SetTitle(string title); + } + + //WARNING: Ctrl-C for aborting running script does NOT work when you run console app from Visual Studio 2010. + // Run executable directly from bin folder. + //public class CommandLine { + // #region Fields and properties + // public readonly LanguageRuntime Runtime; + // public readonly IConsoleAdaptor _console; + // //Initialized from grammar + // public string Title; + // public string Greeting; + // public string Prompt; //default prompt + // public string PromptMoreInput; //prompt to show when more input is expected + + // public readonly ScriptApp App; + // Thread _workerThread; + // public bool IsEvaluating { get; private set; } + + // #endregion + + // public CommandLine(LanguageRuntime runtime, IConsoleAdaptor console = null) { + // Runtime = runtime; + // _console = console ?? new ConsoleAdapter(); + // var grammar = runtime.Language.Grammar; + // Title = grammar.ConsoleTitle; + // Greeting = grammar.ConsoleGreeting; + // Prompt = grammar.ConsolePrompt; + // PromptMoreInput = grammar.ConsolePromptMoreInput; + // App = new ScriptApp(Runtime); + // App.ParserMode = ParseMode.CommandLine; + // // App.PrintParseErrors = false; + // App.RethrowExceptions = false; + + // } + + // public void Run() { + // try { + // RunImpl(); + // } catch (Exception ex) { + // _console.SetTextStyle(ConsoleTextStyle.Error); + // _console.WriteLine(Resources.ErrConsoleFatalError); + // _console.WriteLine(ex.ToString()); + // _console.SetTextStyle(ConsoleTextStyle.Normal); + // _console.WriteLine(Resources.MsgPressAnyKeyToExit); + // _console.Read(); + // } + // } + + // private void RunImpl() { + // _console.SetTitle(Title); + // _console.WriteLine(Greeting); + // string input; + // while (true) { + // _console.Canceled = false; + // _console.SetTextStyle(ConsoleTextStyle.Normal); + // string prompt = (App.Status == AppStatus.WaitingMoreInput ? PromptMoreInput : Prompt); + + // //Write prompt, read input, check for Ctrl-C + // _console.Write(prompt); + // input = _console.ReadLine(); + // if (_console.Canceled) + // if (Confirm(Resources.MsgExitConsoleYN)) + // return; + // else + // continue; //from the start of the loop + + // //Execute + // App.ClearOutputBuffer(); + // EvaluateAsync(input); + // //Evaluate(input); + // WaitForScriptComplete(); + + // switch (App.Status) { + // case AppStatus.Ready: //success + // _console.WriteLine(App.GetOutput()); + // break; + // case AppStatus.SyntaxError: + // _console.WriteLine(App.GetOutput()); //write all output we have + // _console.SetTextStyle(ConsoleTextStyle.Error); + // foreach (var err in App.GetParserMessages()) { + // _console.WriteLine(string.Empty.PadRight(prompt.Length + err.Location.Column) + "^"); //show err location + // _console.WriteLine(err.Message); //print message + // } + // break; + // case AppStatus.Crash: + // case AppStatus.RuntimeError: + // ReportException(); + // break; + // default: break; + // }//switch + // } + + // }//Run method + + // private void WaitForScriptComplete() { + // _console.Canceled = false; + // while(true) { + // Thread.Sleep(50); + // if(!IsEvaluating) return; + // if(_console.Canceled) { + // _console.Canceled = false; + // if (Confirm(Resources.MsgAbortScriptYN)) + // WorkerThreadAbort(); + // }//if Canceled + // } + // } + + // private void Evaluate(string script) { + // try { + // IsEvaluating = true; + // App.Evaluate(script); + // } finally { + // IsEvaluating = false; + // } + // } + + // private void EvaluateAsync(string script) { + // IsEvaluating = true; + // _workerThread = new Thread(WorkerThreadStart); + // _workerThread.Start(script); + // } + + // private void WorkerThreadStart(object data) { + // try { + // var script = data as string; + // App.Evaluate(script); + // } finally { + // IsEvaluating = false; + // } + // } + // private void WorkerThreadAbort() { + // try { + // _workerThread.Abort(); + // _workerThread.Join(50); + // } finally { + // IsEvaluating = false; + // } + // } + + // private bool Confirm(string message) { + // _console.WriteLine(string.Empty); + // _console.Write(message); + // var input = _console.ReadLine(); + // return Resources.ConsoleYesChars.Contains(input); + // } + + // private void ReportException() { + // _console.SetTextStyle(ConsoleTextStyle.Error); + // var ex = App.LastException; + // var scriptEx = ex as ScriptException; + // if (scriptEx != null) + // _console.WriteLine(scriptEx.Message + " " + Resources.LabelLocation + " " + scriptEx.Location.ToUiString()); + // else { + // if (App.Status == AppStatus.Crash) + // _console.WriteLine(ex.ToString()); //Unexpected interpreter crash: the full stack when debugging your language + // else + // _console.WriteLine(ex.Message); + + // } + // // + // } + + //}//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/SriptApplication/ConsoleAdaptor.cs b/src/Irony.Interpreter/SriptApplication/ConsoleAdaptor.cs new file mode 100644 index 0000000..e8b039b --- /dev/null +++ b/src/Irony.Interpreter/SriptApplication/ConsoleAdaptor.cs @@ -0,0 +1,83 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter.SriptApplication +{ + //WARNING: Ctrl-C for aborting running script does NOT work when you run console app from Visual Studio 2010. + // Run executable directly from bin folder. + + public enum ConsoleTextStyle + { + Normal, + Error, + } + + // Default implementation of IConsoleAdaptor with System Console as input/output. + public class ConsoleAdapter : IConsoleAdaptor + { + public ConsoleAdapter() + { + Console.CancelKeyPress += Console_CancelKeyPress; + } + + private void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) + { + e.Cancel = true; //do not kill the app yet + Canceled = true; + } + + public bool Canceled { get; set; } + + public void Write(string text) + { + Console.Write(text); + } + + public void WriteLine(string text) + { + Console.WriteLine(text); + } + + public void SetTextStyle(ConsoleTextStyle style) + { + switch (style) + { + case ConsoleTextStyle.Normal: + Console.ForegroundColor = ConsoleColor.White; + break; + + case ConsoleTextStyle.Error: + Console.ForegroundColor = ConsoleColor.Red; + break; + } + } + + public int Read() + { + return Console.Read(); + } + + public string ReadLine() + { + var input = Console.ReadLine(); + Canceled = input == null; // Windows console method ReadLine returns null if Ctrl-C was pressed. + return input; + } + + public void SetTitle(string title) + { + Console.Title = title; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/SriptApplication/ScriptApp.cs b/src/Irony.Interpreter/SriptApplication/ScriptApp.cs new file mode 100644 index 0000000..f0da2f1 --- /dev/null +++ b/src/Irony.Interpreter/SriptApplication/ScriptApp.cs @@ -0,0 +1,260 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; +using Sanchime.Irony.Interpreter.Utilities; +using System.Reflection; +using System.Security; + +namespace Sanchime.Irony.Interpreter.SriptApplication +{ + public enum AppStatus + { + Ready, + Evaluating, + WaitingMoreInput, //command line only + SyntaxError, + RuntimeError, + Crash, //interpreter crash + Aborted + } + + /// Represents a running instance of a script application. + public sealed class ScriptApp + { + public readonly LanguageData Language; + public readonly LanguageRuntime Runtime; + public Parser Parser { get; private set; } + + public AppDataMap DataMap; + + public Scope[] StaticScopes; + public Scope MainScope; + public IDictionary Globals { get; private set; } + private IList ImportedAssemblies = new List(); + + public StringBuilder OutputBuffer = new StringBuilder(); + private object _lockObject = new object(); + + // Current mode/status variables + public AppStatus Status; + + public long EvaluationTime; + public Exception LastException; + public bool RethrowExceptions = true; + + public ParseTree LastScript { get; private set; } //the root node of the last executed script + + #region Constructors + + public ScriptApp(LanguageData language) + { + Language = language; + var grammar = language.Grammar as InterpretedLanguageGrammar; + Runtime = grammar.CreateRuntime(language); + DataMap = new AppDataMap(Language.Grammar.CaseSensitive); + Init(); + } + + public ScriptApp(LanguageRuntime runtime) + { + Runtime = runtime; + Language = Runtime.Language; + DataMap = new AppDataMap(Language.Grammar.CaseSensitive); + Init(); + } + + public ScriptApp(AppDataMap dataMap) + { + DataMap = dataMap; + Init(); + } + + [SecuritySafeCritical] + private void Init() + { + Parser = new Parser(Language); + //Create static scopes + MainScope = new Scope(DataMap.MainModule.ScopeInfo, null, null, null); + StaticScopes = new Scope[DataMap.StaticScopeInfos.Count]; + StaticScopes[0] = MainScope; + Globals = MainScope.AsDictionary(); + } + + #endregion + + public LogMessageList GetParserMessages() + { + return Parser.Context.CurrentParseTree.ParserMessages; + } + + public ParseMode ParserMode + { + get { return Parser.Context.Mode; } + set { Parser.Context.Mode = value; } + } + + #region Evaluation + + public object Evaluate(string script) + { + try + { + var parsedScript = Parser.Parse(script); + if (parsedScript.HasErrors()) + { + Status = AppStatus.SyntaxError; + if (RethrowExceptions) + throw new ScriptException("Syntax errors found."); + return null; + } + + if (ParserMode == ParseMode.CommandLine && Parser.Context.Status == ParserStatus.AcceptedPartial) + { + Status = AppStatus.WaitingMoreInput; + return null; + } + LastScript = parsedScript; + var result = EvaluateParsedScript(); + return result; + } + catch (ScriptException) + { + throw; + } + catch (Exception ex) + { + LastException = ex; + Status = AppStatus.Crash; + return null; + } + } + + // Irony interpreter requires that once a script is executed in a ScriptApp, it is bound to AppDataMap object, + // and all later script executions should be performed only in the context of the same app (or at least by an App with the same DataMap). + // The reason is because the first execution sets up a data-binding fields, like slots, scopes, etc, which are bound to ScopeInfo objects, + // which in turn is part of DataMap. + public object Evaluate(ParseTree parsedScript) + { + Util.Check(parsedScript.Root.AstNode != null, "Root AST node is null, cannot evaluate script. Create AST tree first."); + var root = parsedScript.Root.AstNode as AstNode; + Util.Check(root != null, + "Root AST node {0} is not a subclass of Irony.Interpreter.AstNode. ScriptApp cannot evaluate this script.", root.GetType()); + Util.Check(root.Parent == null || root.Parent == DataMap.ProgramRoot, + "Cannot evaluate parsed script. It had been already evaluated in a different application."); + LastScript = parsedScript; + return EvaluateParsedScript(); + } + + public object Evaluate() + { + Util.Check(LastScript != null, "No previously parsed/evaluated script."); + return EvaluateParsedScript(); + } + + //Actual implementation + private object EvaluateParsedScript() + { + LastScript.Tag = DataMap; + var root = LastScript.Root.AstNode as AstNode; + root.DependentScopeInfo = MainScope.Info; + + Status = AppStatus.Evaluating; + ScriptThread thread = null; + try + { + thread = new ScriptThread(this); + var result = root.Evaluate(thread); + if (result != null) + thread.App.WriteLine(result.ToString()); + Status = AppStatus.Ready; + return result; + } + catch (ScriptException se) + { + Status = AppStatus.RuntimeError; + se.Location = thread.CurrentNode.Location; + se.ScriptStackTrace = thread.GetStackTrace(); + LastException = se; + if (RethrowExceptions) + throw; + return null; + } + catch (Exception ex) + { + Status = AppStatus.RuntimeError; + var se = new ScriptException(ex.Message, ex, thread.CurrentNode.Location, thread.GetStackTrace()); + LastException = se; + if (RethrowExceptions) + throw se; + return null; + }//catch + } + + #endregion + + #region Output writing + + #region ConsoleWrite event + + public event EventHandler ConsoleWrite; + + private void OnConsoleWrite(string text) + { + if (ConsoleWrite != null) + { + ConsoleWriteEventArgs args = new ConsoleWriteEventArgs(text); + ConsoleWrite(this, args); + } + } + + #endregion + + public void Write(string text) + { + lock (_lockObject) + { + OnConsoleWrite(text); + OutputBuffer.Append(text); + } + } + + public void WriteLine(string text) + { + lock (_lockObject) + { + OnConsoleWrite(text + Environment.NewLine); + OutputBuffer.AppendLine(text); + } + } + + public void ClearOutputBuffer() + { + lock (_lockObject) + { + OutputBuffer.Clear(); + } + } + + public string GetOutput() + { + lock (_lockObject) + { + return OutputBuffer.ToString(); + } + } + + #endregion + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/SriptApplication/ScriptThread.cs b/src/Irony.Interpreter/SriptApplication/ScriptThread.cs new file mode 100644 index 0000000..62085db --- /dev/null +++ b/src/Irony.Interpreter/SriptApplication/ScriptThread.cs @@ -0,0 +1,107 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Scopes; + +namespace Sanchime.Irony.Interpreter.SriptApplication +{ + /// Represents a running thread in script application. + public sealed class ScriptThread : IBindingSource + { + public readonly ScriptApp App; + public readonly LanguageRuntime Runtime; + + public Scope CurrentScope; + public AstNode CurrentNode; + + // Tail call parameters + public ICallTarget Tail; + + public object[] TailArgs; + + public ScriptThread(ScriptApp app) + { + App = app; + Runtime = App.Runtime; + CurrentScope = app.MainScope; + } + + public void PushScope(ScopeInfo scopeInfo, object[] parameters) + { + CurrentScope = new Scope(scopeInfo, CurrentScope, CurrentScope, parameters); + } + + public void PushClosureScope(ScopeInfo scopeInfo, Scope closureParent, object[] parameters) + { + CurrentScope = new Scope(scopeInfo, CurrentScope, closureParent, parameters); + } + + public void PopScope() + { + CurrentScope = CurrentScope.Caller; + } + + public Binding Bind(string symbol, BindingRequestFlags options) + { + var request = new BindingRequest(this, CurrentNode, symbol, options); + var binding = Bind(request); + if (binding == null) + ThrowScriptError("Unknown symbol '{0}'.", symbol); + return binding; + } + + #region Exception handling + + public object HandleError(Exception exception) + { + if (exception is ScriptException) + throw exception; + var stack = GetStackTrace(); + var rex = new ScriptException(exception.Message, exception, CurrentNode.ErrorAnchor, stack); + throw rex; + } + + // Throws ScriptException exception. + public void ThrowScriptError(string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + var loc = GetCurrentLocation(); + var stack = GetStackTrace(); + throw new ScriptException(message, null, loc, stack); + } + + //TODO: add construction of Script Call stack + public ScriptStackTrace GetStackTrace() + { + return new ScriptStackTrace(); + } + + private SourceLocation GetCurrentLocation() + { + return CurrentNode == null ? new SourceLocation() : CurrentNode.Location; + } + + #endregion + + #region IBindingSource Members + + public Binding Bind(BindingRequest request) + { + return Runtime.Bind(request); + } + + #endregion + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Utilities/Extensions.cs b/src/Irony.Interpreter/Utilities/Extensions.cs new file mode 100644 index 0000000..7fbfc6f --- /dev/null +++ b/src/Irony.Interpreter/Utilities/Extensions.cs @@ -0,0 +1,15 @@ +namespace Sanchime.Irony.Interpreter.Utilities +{ + public static class InterpreterEnumExtensions + { + public static bool IsSet(this BindingRequestFlags enumValue, BindingRequestFlags flag) + { + return (enumValue & flag) != 0; + } + + public static bool IsSet(this AstNodeFlags enumValue, AstNodeFlags flag) + { + return (enumValue & flag) != 0; + } + } +} \ No newline at end of file diff --git a/src/Irony.Interpreter/Utilities/Util.cs b/src/Irony.Interpreter/Utilities/Util.cs new file mode 100644 index 0000000..35fbbdb --- /dev/null +++ b/src/Irony.Interpreter/Utilities/Util.cs @@ -0,0 +1,25 @@ +namespace Sanchime.Irony.Interpreter.Utilities +{ + public static class Util + { + public static string SafeFormat(this string template, params object[] args) + { + if (args == null || args.Length == 0) return template; + try + { + template = string.Format(template, args); + } + catch (Exception ex) + { + template = template + "(message formatting failed: " + ex.Message + " Args: " + string.Join(",", args) + ")"; + } + return template; + }//method + + public static void Check(bool condition, string messageTemplate, params object[] args) + { + if (condition) return; + throw new Exception(messageTemplate.SafeFormat(args)); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/_Evaluator/ExpressionEvaluator.cs b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluator.cs new file mode 100644 index 0000000..bd78907 --- /dev/null +++ b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluator.cs @@ -0,0 +1,73 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Interpreter._Evaluator +{ + public class ExpressionEvaluator + { + public ExpressionEvaluatorGrammar Grammar { get; private set; } + public Parser Parser { get; private set; } + public LanguageData Language { get; private set; } + public LanguageRuntime Runtime { get; private set; } + public ScriptApp App { get; private set; } + + public IDictionary Globals + { + get { return App.Globals; } + } + + //Default constructor, creates default evaluator + public ExpressionEvaluator() : this(new ExpressionEvaluatorGrammar()) + { + } + + //Default constructor, creates default evaluator + public ExpressionEvaluator(ExpressionEvaluatorGrammar grammar) + { + Grammar = grammar; + Language = new LanguageData(Grammar); + Parser = new Parser(Language); + Runtime = Grammar.CreateRuntime(Language); + App = new ScriptApp(Runtime); + } + + public object Evaluate(string script) + { + var result = App.Evaluate(script); + return result; + } + + public object Evaluate(ParseTree parsedScript) + { + var result = App.Evaluate(parsedScript); + return result; + } + + //Evaluates again the previously parsed/evaluated script + public object Evaluate() + { + return App.Evaluate(); + } + + public void ClearOutput() + { + App.ClearOutputBuffer(); + } + + public string GetOutput() + { + return App.GetOutput(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorGrammar.cs b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorGrammar.cs new file mode 100644 index 0000000..ca9c874 --- /dev/null +++ b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorGrammar.cs @@ -0,0 +1,184 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Interpreter.Ast.Expressions; +using Sanchime.Irony.Interpreter.Ast.Functions; +using Sanchime.Irony.Interpreter.Ast.PrimitiveNodes; +using Sanchime.Irony.Interpreter.Ast.Statements; + +namespace Sanchime.Irony.Interpreter._Evaluator +{ + // A ready-to-use evaluator implementation. + + // This grammar describes programs that consist of simple expressions and assignments + // for ex: + // x = 3 + // y = -x + 5 + // the result of calculation is the result of last expression or assignment. + // Irony's default runtime provides expression evaluation. + // supports inc/dec operators (++,--), both prefix and postfix, and combined assignment operators like +=, -=, etc. + // supports bool operators &, |, and short-circuit versions &&, || + // supports ternary ?: operator + + [Language("ExpressionEvaluator", "1.0", "Multi-line expression evaluator")] + public class ExpressionEvaluatorGrammar : InterpretedLanguageGrammar + { + public ExpressionEvaluatorGrammar() : base(caseSensitive: false) + { + GrammarComments = + @"Irony expression evaluator. Case-insensitive. Supports big integers, float data types, variables, assignments, +arithmetic operations, augmented assignments (+=, -=), inc/dec (++,--), strings with embedded expressions; +bool operations &,&&, |, ||; ternary '?:' operator."; + // 1. Terminals + var number = new NumberLiteral("number"); + //Let's allow big integers (with unlimited number of digits): + number.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; + var identifier = new IdentifierTerminal("identifier"); + var comment = new CommentTerminal("comment", "#", "\n", "\r"); + //comment must be added to NonGrammarTerminals list; it is not used directly in grammar rules, + // so we add it to this list to let Scanner know that it is also a valid terminal. + NonGrammarTerminals.Add(comment); + var comma = ToTerm(","); + + //String literal with embedded expressions ------------------------------------------------------------------ + var stringLit = new StringLiteral("string", "\"", StringOptions.AllowsAllEscapes | StringOptions.IsTemplate); + stringLit.AddStartEnd("'", StringOptions.AllowsAllEscapes | StringOptions.IsTemplate); + stringLit.AstConfig.NodeType = typeof(StringTemplateNode); + var Expr = new NonTerminal("Expr"); //declare it here to use in template definition + var templateSettings = new StringTemplateSettings(); //by default set to Ruby-style settings + templateSettings.ExpressionRoot = Expr; //this defines how to evaluate expressions inside template + SnippetRoots.Add(Expr); + stringLit.AstConfig.Data = templateSettings; + //-------------------------------------------------------------------------------------------------------- + + // 2. Non-terminals + var Term = new NonTerminal("Term"); + var BinExpr = new NonTerminal("BinExpr", typeof(BinaryOperationNode)); + var ParExpr = new NonTerminal("ParExpr"); + var UnExpr = new NonTerminal("UnExpr", typeof(UnaryOperationNode)); + var TernaryIfExpr = new NonTerminal("TernaryIf", typeof(IfNode)); + var ArgList = new NonTerminal("ArgList", typeof(ExpressionListNode)); + var FunctionCall = new NonTerminal("FunctionCall", typeof(FunctionCallNode)); + var MemberAccess = new NonTerminal("MemberAccess", typeof(MemberAccessNode)); + var IndexedAccess = new NonTerminal("IndexedAccess", typeof(IndexedAccessNode)); + var ObjectRef = new NonTerminal("ObjectRef"); // foo, foo.bar or f['bar'] + var UnOp = new NonTerminal("UnOp"); + var BinOp = new NonTerminal("BinOp", "operator"); + var PrefixIncDec = new NonTerminal("PrefixIncDec", typeof(IncDecNode)); + var PostfixIncDec = new NonTerminal("PostfixIncDec", typeof(IncDecNode)); + var IncDecOp = new NonTerminal("IncDecOp"); + var AssignmentStmt = new NonTerminal("AssignmentStmt", typeof(AssignmentNode)); + var AssignmentOp = new NonTerminal("AssignmentOp", "assignment operator"); + var Statement = new NonTerminal("Statement"); + var Program = new NonTerminal("Program", typeof(StatementListNode)); + + // 3. BNF rules + Expr.Rule = Term | UnExpr | BinExpr | PrefixIncDec | PostfixIncDec | TernaryIfExpr; + Term.Rule = number | ParExpr | stringLit | FunctionCall | identifier | MemberAccess | IndexedAccess; + ParExpr.Rule = "(" + Expr + ")"; + UnExpr.Rule = UnOp + Term + ReduceHere(); + UnOp.Rule = ToTerm("+") | "-" | "!"; + BinExpr.Rule = Expr + BinOp + Expr; + BinOp.Rule = ToTerm("+") | "-" | "*" | "/" | "**" | "==" | "<" | "<=" | ">" | ">=" | "!=" | "&&" | "||" | "&" | "|"; + PrefixIncDec.Rule = IncDecOp + identifier; + PostfixIncDec.Rule = identifier + PreferShiftHere() + IncDecOp; + IncDecOp.Rule = ToTerm("++") | "--"; + TernaryIfExpr.Rule = Expr + "?" + Expr + ":" + Expr; + MemberAccess.Rule = Expr + PreferShiftHere() + "." + identifier; + AssignmentStmt.Rule = ObjectRef + AssignmentOp + Expr; + AssignmentOp.Rule = ToTerm("=") | "+=" | "-=" | "*=" | "/="; + Statement.Rule = AssignmentStmt | Expr | Empty; + ArgList.Rule = MakeStarRule(ArgList, comma, Expr); + FunctionCall.Rule = Expr + PreferShiftHere() + "(" + ArgList + ")"; + FunctionCall.NodeCaptionTemplate = "call #{0}(...)"; + ObjectRef.Rule = identifier | MemberAccess | IndexedAccess; + IndexedAccess.Rule = Expr + PreferShiftHere() + "[" + Expr + "]"; + + Program.Rule = MakePlusRule(Program, NewLine, Statement); + + Root = Program; // Set grammar root + + // 4. Operators precedence + RegisterOperators(10, "?"); + RegisterOperators(15, "&", "&&", "|", "||"); + RegisterOperators(20, "==", "<", "<=", ">", ">=", "!="); + RegisterOperators(30, "+", "-"); + RegisterOperators(40, "*", "/"); + RegisterOperators(50, Associativity.Right, "**"); + RegisterOperators(60, "!"); + // For precedence to work, we need to take care of one more thing: BinOp. + //For BinOp which is or-combination of binary operators, we need to either + // 1) mark it transient or 2) set flag TermFlags.InheritPrecedence + // We use first option, making it Transient. + + // 5. Punctuation and transient terms + MarkPunctuation("(", ")", "?", ":", "[", "]"); + RegisterBracePair("(", ")"); + RegisterBracePair("[", "]"); + MarkTransient(Term, Expr, Statement, BinOp, UnOp, IncDecOp, AssignmentOp, ParExpr, ObjectRef); + + // 7. Syntax error reporting + MarkNotReported("++", "--"); + AddToNoReportGroup("(", "++", "--"); + AddToNoReportGroup(NewLine); + AddOperatorReportGroup("operator"); + AddTermsReportGroup("assignment operator", "=", "+=", "-=", "*=", "/="); + + //8. Console + ConsoleTitle = "Irony Expression Evaluator"; + ConsoleGreeting = + @"Irony Expression Evaluator + + Supports variable assignments, arithmetic operators (+, -, *, /), + augmented assignments (+=, -=, etc), prefix/postfix operators ++,--, string operations. + Supports big integer arithmetics, string operations. + Supports strings with embedded expressions : ""name: #{name}"" + +Press Ctrl-C to exit the program at any time. +"; + ConsolePrompt = "?"; + ConsolePromptMoreInput = "?"; + + //9. Language flags. + // Automatically add NewLine before EOF so that our BNF rules work correctly when there's no final line break in source + LanguageFlags = LanguageFlags.NewLineBeforeEOF | LanguageFlags.CreateAst | LanguageFlags.SupportsBigInt; + } + + public override LanguageRuntime CreateRuntime(LanguageData language) + { + return new ExpressionEvaluatorRuntime(language); + } + + #region Running in Grammar Explorer + + private static ExpressionEvaluator _evaluator; + + public override string RunSample(RunSampleArgs args) + { + if (_evaluator == null) + { + _evaluator = new ExpressionEvaluator(this); + _evaluator.Globals.Add("null", _evaluator.Runtime.NoneValue); + _evaluator.Globals.Add("true", true); + _evaluator.Globals.Add("false", false); + } + _evaluator.ClearOutput(); + //for (int i = 0; i < 1000; i++) //for perf measurements, to execute 1000 times + _evaluator.Evaluate(args.ParsedSample); + return _evaluator.GetOutput(); + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorRuntime.cs b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorRuntime.cs new file mode 100644 index 0000000..35201e4 --- /dev/null +++ b/src/Irony.Interpreter/_Evaluator/ExpressionEvaluatorRuntime.cs @@ -0,0 +1,54 @@ +namespace Sanchime.Irony.Interpreter._Evaluator +{ + public class ExpressionEvaluatorRuntime : LanguageRuntime + { + public ExpressionEvaluatorRuntime(LanguageData language) : base(language) + { + } + + public override void Init() + { + base.Init(); + //add built-in methods, special form IIF, import Math and Environment methods + BuiltIns.AddMethod(BuiltInPrintMethod, "print"); + BuiltIns.AddMethod(BuiltInFormatMethod, "format"); + BuiltIns.AddSpecialForm(SpecialFormsLibrary.Iif, "iif", 3, 3); + BuiltIns.ImportStaticMembers(typeof(Math)); + BuiltIns.ImportStaticMembers(typeof(Environment)); + } + + //Built-in methods + private object BuiltInPrintMethod(ScriptThread thread, object[] args) + { + string text = string.Empty; + switch (args.Length) + { + case 1: + text = string.Empty + args[0]; //compact and safe conversion ToString() + break; + + case 0: + break; + + default: + text = string.Join(" ", args); + break; + } + thread.App.WriteLine(text); + return null; + } + + private object BuiltInFormatMethod(ScriptThread thread, object[] args) + { + if (args == null || args.Length == 0) return null; + var template = args[0] as string; + if (template == null) + this.ThrowScriptError("Format template must be a string."); + if (args.Length == 1) return template; + //create formatting args array + var formatArgs = args.Skip(1).ToArray(); + var text = string.Format(template, formatArgs); + return text; + } + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/DatouGrammar.cs b/src/Irony.SampleApp/DatouGrammar.cs new file mode 100644 index 0000000..c758994 --- /dev/null +++ b/src/Irony.SampleApp/DatouGrammar.cs @@ -0,0 +1,84 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Terminals; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Sanchime.Irony.SampleApp +{ + [Language("大头", "0.0.1", "测试")] + public class DatouGrammar : Grammar + { + public DatouGrammar(): base(true) + { + #region 定义关键字 + var globals = ToTerm("globals"); + var end = ToTerm("end"); + var @return = ToTerm("return"); + var func = ToTerm("func"); + var local = ToTerm("local"); + #endregion + // 定义数字类型 + var number = new NumberLiteral("Number"); + number.DefaultIntTypes = new TypeCode[] { TypeCode.Int16, TypeCode.Int32, TypeCode.Int64, TypeCode.Decimal, TypeCode.Single, TypeCode.Double }; + // 默认Decimal类型 + number.DefaultFloatType = TypeCode.Decimal; + + var Identifier = new IdentifierTerminal("Identifier"); + + CommentTerminal blockComment = new CommentTerminal("block-comment", "/*", "*/"); + CommentTerminal lineComment = new CommentTerminal("line-comment", "//", + "\r", "\n", "\u2085", "\u2028", "\u2029"); + NonGrammarTerminals.Add(blockComment); + NonGrammarTerminals.Add(lineComment); + // 变量声明 + var Declaration = new NonTerminal("Declaration"); + Declaration.Rule = MakeStarRule(Declaration, Declaration); + Declaration.Rule = Identifier; + var Parameters = new NonTerminal("Parameters"); + // a, b, c + Parameters.Rule = Parameters + "," + Identifier | Identifier; + // 全局变量 + var Globals = new NonTerminal("Globals"); + Globals.Rule = "globals" + Declaration + "end"; + // 定义函数 + var Function = new NonTerminal("Function"); + + Function.Rule = "func" + Identifier + "end"; + + #region 表达式 + // 定义表达式 + var Expression = new NonTerminal("Expression"); + // 小括号表达式 + var ParenthesesExpression = new NonTerminal("ParenthesesExpression"); + ParenthesesExpression.Rule = "(" + Expression + ")"; + // 定义一元表达式 + var UnaryExpression = new NonTerminal("UnaryExpression"); + var UnarOperation = new NonTerminal("UnaryOperation", "operator"); + UnarOperation.Rule = ToTerm("+") | "-" | "++" | "--"; + // 定义二元表达式 + var BinaryExpression = new NonTerminal("BinaryExpression"); + var BinaryOperation = new NonTerminal("BinaryOperation", "operator"); + BinaryOperation.Rule = ToTerm("+") | "-" | "*" | "/"; + BinaryExpression.Rule = Expression + BinaryOperation + Expression; + + Expression.Rule = UnaryExpression | BinaryExpression | ParenthesesExpression; + #endregion + // 赋值 + var Assignment = new NonTerminal("Assignment"); + + #region 运算符优先级 + RegisterOperators(1, "+", "-"); + RegisterOperators(2, "-", "*"); + #endregion + + + // 大头语言主体 + var Program = new NonTerminal("Program"); + + + } + } +} diff --git a/src/Irony.SampleApp/Evaluations/BinaryEvaluation.cs b/src/Irony.SampleApp/Evaluations/BinaryEvaluation.cs new file mode 100644 index 0000000..a7a9846 --- /dev/null +++ b/src/Irony.SampleApp/Evaluations/BinaryEvaluation.cs @@ -0,0 +1,46 @@ +using System; + +namespace Sanchime.Irony.SampleApp.Evaluations +{ + internal sealed class BinaryEvaluation : Evaluation + { + private readonly Evaluation left; + private readonly Evaluation right; + + private readonly BinaryOperation oper; + + public BinaryEvaluation(Evaluation left, Evaluation right, BinaryOperation oper) + { + this.left = left; + this.right = right; + this.oper = oper; + } + + public override object Value + { + get + { + if (left.Value == null || right.Value == null) + { + throw new InvalidOperationException("Either left or right value of the binary evaluation has been evaluated to null."); + } + if (!float.TryParse(left.Value.ToString(), out float leftValue) || + !float.TryParse(right.Value.ToString(), out float rightValue)) + { + throw new InvalidOperationException("Either left or right value of the binary evaluation cannot be evaluated as a float value."); + } + + return oper switch + { + BinaryOperation.Add => leftValue + rightValue, + BinaryOperation.Sub => leftValue - rightValue, + BinaryOperation.Mul => leftValue * rightValue, + BinaryOperation.Div => leftValue / rightValue, + _ => throw new InvalidOperationException("无效的二元运算符") + }; + } + } + + public override string ToString() => $"{left?.ToString()} {oper} {right?.ToString()}"; + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Evaluations/BinaryOperation.cs b/src/Irony.SampleApp/Evaluations/BinaryOperation.cs new file mode 100644 index 0000000..2fd37d2 --- /dev/null +++ b/src/Irony.SampleApp/Evaluations/BinaryOperation.cs @@ -0,0 +1,10 @@ +namespace Sanchime.Irony.SampleApp.Evaluations +{ + internal enum BinaryOperation + { + Add, + Sub, + Mul, + Div + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Evaluations/ConstantEvaluation.cs b/src/Irony.SampleApp/Evaluations/ConstantEvaluation.cs new file mode 100644 index 0000000..dca0c0f --- /dev/null +++ b/src/Irony.SampleApp/Evaluations/ConstantEvaluation.cs @@ -0,0 +1,14 @@ +namespace Sanchime.Irony.SampleApp.Evaluations +{ + internal sealed class ConstantEvaluation : Evaluation + { + private readonly object value; + + public ConstantEvaluation(object value) + { + this.value = value; + } + + public override object Value => value; + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Evaluations/Evaluation.cs b/src/Irony.SampleApp/Evaluations/Evaluation.cs new file mode 100644 index 0000000..0555d0f --- /dev/null +++ b/src/Irony.SampleApp/Evaluations/Evaluation.cs @@ -0,0 +1,9 @@ +namespace Sanchime.Irony.SampleApp.Evaluations +{ + internal abstract class Evaluation + { + public abstract object Value { get; } + + public override string ToString() => Value?.ToString(); + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Evaluations/Evaluator.cs b/src/Irony.SampleApp/Evaluations/Evaluator.cs new file mode 100644 index 0000000..fc140ba --- /dev/null +++ b/src/Irony.SampleApp/Evaluations/Evaluator.cs @@ -0,0 +1,73 @@ +using Sanchime.Irony.Parsing.Data; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Utilities; +using System; +using System.Globalization; +using System.Text; + +namespace Sanchime.Irony.SampleApp.Evaluations +{ + internal sealed class Evaluator + { + public Evaluation Evaluate(string input) + { + var language = new LanguageData(new ExpressionGrammar()); + var parser = new Parser(language); + var syntaxTree = parser.Parse(input); + + if (syntaxTree.HasErrors()) + { + throw new InvalidOperationException(BuildParsingErrorMessage(syntaxTree.ParserMessages)); + } + + return PerformEvaluate(syntaxTree.Root); + } + + private Evaluation PerformEvaluate(ParseTreeNode node) + { + switch (node.Term.Name) + { + case "BinaryExpression": + var leftNode = node.ChildNodes[0]; + var opNode = node.ChildNodes[1]; + var rightNode = node.ChildNodes[2]; + Evaluation left = PerformEvaluate(leftNode); + Evaluation right = PerformEvaluate(rightNode); + BinaryOperation op = BinaryOperation.Add; + switch (opNode.Term.Name) + { + case "+": + op = BinaryOperation.Add; + break; + + case "-": + op = BinaryOperation.Sub; + break; + + case "*": + op = BinaryOperation.Mul; + break; + + case "/": + op = BinaryOperation.Div; + break; + } + return new BinaryEvaluation(left, right, op); + + case "Number": + var value = Convert.ToSingle(node.Token.Text, CultureInfo.InvariantCulture.NumberFormat); + return new ConstantEvaluation(value); + } + + throw new InvalidOperationException($"Unrecognizable term {node.Term.Name}."); + } + + private static string BuildParsingErrorMessage(LogMessageList messages) + { + var sb = new StringBuilder(); + sb.AppendLine("Parsing failed with the following errors:"); + messages.ForEach(msg => sb.AppendLine($"\t{msg.Message}")); + return sb.ToString(); + } + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/ExpressionGrammar.cs b/src/Irony.SampleApp/ExpressionGrammar.cs new file mode 100644 index 0000000..0b921cb --- /dev/null +++ b/src/Irony.SampleApp/ExpressionGrammar.cs @@ -0,0 +1,53 @@ +using Sanchime.Irony.Interpreter.Ast.Expressions; +using Sanchime.Irony.Interpreter.Ast.Statements; +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Terminals; +using System; + +namespace Sanchime.Irony.SampleApp +{ + /// + /// Represents the grammar of a custom expression. + /// + /// + [Language("Expression Grammar", "1.0", "A simple arithmetic expression grammar.")] + public class ExpressionGrammar : Grammar + { + /// + /// Initializes a new instance of the class. + /// + public ExpressionGrammar() : base(false) + { + var number = new NumberLiteral("Number"); + number.DefaultIntTypes = new TypeCode[] { TypeCode.Int16, TypeCode.Int32, TypeCode.Int64 }; + number.DefaultFloatType = TypeCode.Single; + + var identifier = new IdentifierTerminal("Identifier"); + var comma = ToTerm(","); + + var BinOp = new NonTerminal("BinaryOperator", "operator"); + var ParExpr = new NonTerminal("ParenthesisExpression"); + var BinExpr = new NonTerminal("BinaryExpression", typeof(BinaryOperationNode)); + var Expr = new NonTerminal("Expression"); + var Term = new NonTerminal("Term"); + + var Program = new NonTerminal("Program", typeof(StatementListNode)); + + Expr.Rule = Term | ParExpr | BinExpr; + Term.Rule = number | identifier; + + ParExpr.Rule = "(" + Expr + ")"; + BinExpr.Rule = Expr + BinOp + Expr; + BinOp.Rule = ToTerm("+") | "-" | "*" | "/"; + + RegisterOperators(10, "+", "-"); + RegisterOperators(20, "*", "/"); + + MarkPunctuation("(", ")"); + RegisterBracePair("(", ")"); + MarkTransient(Expr, Term, BinOp, ParExpr); + + Root = Expr; + } + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Program.cs b/src/Irony.SampleApp/Program.cs new file mode 100644 index 0000000..e4473e1 --- /dev/null +++ b/src/Irony.SampleApp/Program.cs @@ -0,0 +1,31 @@ +using Sanchime.Irony.Parsing.Data; +using Sanchime.Irony.Parsing.Parsers; +using System; + +namespace Sanchime.Irony.SampleApp +{ + internal class Program + { + private static void Main(string[] args) + { + string sql = "SELECT Id, Name, Age, Gender FROM Student WHERE Age > 10 ORDER BY CreateDate"; + Console.WriteLine("SQL:"); + Console.WriteLine(sql); + SqlParser(sql); + } + + private static void SqlParser(string sql) + { + var language = new LanguageData(new SqlGrammar()); + var parser = new Parser(language); + var syntaxTree = parser.Parse(sql); + + + } + + private static void Tree(ParseTree root) + { + + } + } +} \ No newline at end of file diff --git a/src/Irony.SampleApp/SQL/SQL 89.txt b/src/Irony.SampleApp/SQL/SQL 89.txt new file mode 100644 index 0000000..7c269d7 --- /dev/null +++ b/src/Irony.SampleApp/SQL/SQL 89.txt @@ -0,0 +1,260 @@ +! ----------------------------------------------------------------------------------- +! SQL '89 +! +! SQL (Structured Query Language) +! SQL (结构化查询语言) +! +! The SQL programming language was developed as a uniform means of modifying and +! querying relational databases. By using a single abstract language to interact +! with the database, programs can be written that are independent of the vender and +! format of the database itself. Variations are used by Oracle, Microsoft and most +! other developers +! +! In 1992, a new version SQL as released but has yet to be implemented by any major +! developer. The reason to this lies in the sheer complexity of the grammar. SQL 92 +! contains over 300 rules and a myraid of new features. For instance, in SQL 92, the +! developer can create types using COBOL syntax rather than the normal data types of +! SQL 89. This reason combined with the fact that SQL 89 is a time-tested and +! ample tool maintains it as the standard of the database industry. +! +! Update: +! 02/17/2005 +! Added "NULL" to the rule, I also added more comments to the grammar +! +! Note: This is an ad hoc version of the language. If there are any flaws, please +! visit www.devincook.com/goldparser +! ----------------------------------------------------------------------------------- + +"Name" = 'SQL 89' +"Version" = '1989' +"About" = 'This is the ANSI 89 version of SQL. Variations are used by' + | 'Oracle, Microsoft and most other database developers' + +"Start Symbol" = + +! ============================================================================= +! 注释 +! ============================================================================= + +Comment Start = '/*' +Comment End = '*/' +Comment Line = '--' + +! ============================================================================= +! 终结符 +! ============================================================================= + +{String Ch 1} = {Printable} - ["] +{String Ch 2} = {Printable} - [''] +{Id Ch Standard} = {Alphanumeric} + [_] +{Id Ch Extended} = {Printable} - ['['] - [']'] + +StringLiteral = '"'{String Ch 1}*'"' | ''{String Ch 2}*'' +IntegerLiteral = {Digit}+ +RealLiteral = {Digit}+'.'{Digit}+ + +!----- SQL标识符 + +Id = ({Letter}{Id Ch Standard}* | '['{Id Ch Extended}+']') ('.'({Letter}{Id Ch Standard}* | '['{Id Ch Extended}+']'))? + +! ============================================================================= +! 语法规则 +! ============================================================================= + + ::= + | + | + | + | + | + | INSERT INTO Id '(' ')' VALUES '(' ')' + + ::= UPDATE Id SET + + ::= Id '=' ',' + | Id '=' + + ::= DELETE FROM Id + +! ============================================================================= +! 查询语句 +! ============================================================================= + + ')' + | '(' ')' + + ::= ',' + | + + ::= ',' + | + + ::= Id + | Id Id diff --git a/src/Irony.SampleApp/SQL/SQL89.cs b/src/Irony.SampleApp/SQL/SQL89.cs new file mode 100644 index 0000000..3924b3c --- /dev/null +++ b/src/Irony.SampleApp/SQL/SQL89.cs @@ -0,0 +1,243 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Terminals; + +namespace Sanchime.Irony.SampleApp +{ + [Language("SQL", "89", "SQL 89 语法")] + public class SqlGrammar : Grammar + { + public SqlGrammar() : base(false) + { //SQL is case insensitive + //Terminals + var comment = new CommentTerminal("comment", "/*", "*/"); + var lineComment = new CommentTerminal("line_comment", "--", "\n", "\r\n"); + NonGrammarTerminals.Add(comment); + NonGrammarTerminals.Add(lineComment); + var number = new NumberLiteral("number"); + var string_literal = new StringLiteral("string", "'", StringOptions.AllowsDoubledQuote); + var Id_simple = TerminalFactory.CreateSqlExtIdentifier(this, "id_simple"); //covers normal identifiers (abc) and quoted id's ([abc d], "abc d") + var comma = ToTerm(","); + var dot = ToTerm("."); + var CREATE = ToTerm("CREATE"); + var NULL = ToTerm("NULL"); + var NOT = ToTerm("NOT"); + var UNIQUE = ToTerm("UNIQUE"); + var WITH = ToTerm("WITH"); + var TABLE = ToTerm("TABLE"); + var ALTER = ToTerm("ALTER"); + var ADD = ToTerm("ADD"); + var COLUMN = ToTerm("COLUMN"); + var DROP = ToTerm("DROP"); + var CONSTRAINT = ToTerm("CONSTRAINT"); + var INDEX = ToTerm("INDEX"); + var ON = ToTerm("ON"); + var KEY = ToTerm("KEY"); + var PRIMARY = ToTerm("PRIMARY"); + var INSERT = ToTerm("INSERT"); + var INTO = ToTerm("INTO"); + var UPDATE = ToTerm("UPDATE"); + var SET = ToTerm("SET"); + var VALUES = ToTerm("VALUES"); + var DELETE = ToTerm("DELETE"); + var SELECT = ToTerm("SELECT"); + var FROM = ToTerm("FROM"); + var AS = ToTerm("AS"); + var COUNT = ToTerm("COUNT"); + var JOIN = ToTerm("JOIN"); + var BY = ToTerm("BY"); + + // 非终结符 + var Id = new NonTerminal("Id"); + var stmt = new NonTerminal("stmt"); + var createTableStmt = new NonTerminal("createTableStmt"); + var createIndexStmt = new NonTerminal("createIndexStmt"); + var alterStmt = new NonTerminal("alterStmt"); + var dropTableStmt = new NonTerminal("dropTableStmt"); + var dropIndexStmt = new NonTerminal("dropIndexStmt"); + var selectStmt = new NonTerminal("selectStmt"); + var insertStmt = new NonTerminal("insertStmt"); + var updateStmt = new NonTerminal("updateStmt"); + var deleteStmt = new NonTerminal("deleteStmt"); + var fieldDef = new NonTerminal("fieldDef"); + var fieldDefList = new NonTerminal("fieldDefList"); + var nullSpecOpt = new NonTerminal("nullSpecOpt"); + var typeName = new NonTerminal("typeName"); + var typeSpec = new NonTerminal("typeSpec"); + var typeParamsOpt = new NonTerminal("typeParams"); + var constraintDef = new NonTerminal("constraintDef"); + var constraintListOpt = new NonTerminal("constraintListOpt"); + var constraintTypeOpt = new NonTerminal("constraintTypeOpt"); + var idlist = new NonTerminal("idlist"); + var idlistPar = new NonTerminal("idlistPar"); + var uniqueOpt = new NonTerminal("uniqueOpt"); + var orderList = new NonTerminal("orderList"); + var orderMember = new NonTerminal("orderMember"); + var orderDirOpt = new NonTerminal("orderDirOpt"); + var withClauseOpt = new NonTerminal("withClauseOpt"); + var alterCmd = new NonTerminal("alterCmd"); + var insertData = new NonTerminal("insertData"); + var intoOpt = new NonTerminal("intoOpt"); + var assignList = new NonTerminal("assignList"); + var whereClauseOpt = new NonTerminal("whereClauseOpt"); + var assignment = new NonTerminal("assignment"); + var expression = new NonTerminal("expression"); + var exprList = new NonTerminal("exprList"); + var selRestrOpt = new NonTerminal("selRestrOpt"); + var selList = new NonTerminal("selList"); + var intoClauseOpt = new NonTerminal("intoClauseOpt"); + var fromClauseOpt = new NonTerminal("fromClauseOpt"); + var groupClauseOpt = new NonTerminal("groupClauseOpt"); + var havingClauseOpt = new NonTerminal("havingClauseOpt"); + var orderClauseOpt = new NonTerminal("orderClauseOpt"); + var columnItemList = new NonTerminal("columnItemList"); + var columnItem = new NonTerminal("columnItem"); + var columnSource = new NonTerminal("columnSource"); + var asOpt = new NonTerminal("asOpt"); + var aliasOpt = new NonTerminal("aliasOpt"); + var aggregate = new NonTerminal("aggregate"); + var aggregateArg = new NonTerminal("aggregateArg"); + var aggregateName = new NonTerminal("aggregateName"); + var tuple = new NonTerminal("tuple"); + var joinChainOpt = new NonTerminal("joinChainOpt"); + var joinKindOpt = new NonTerminal("joinKindOpt"); + var term = new NonTerminal("term"); + var unExpr = new NonTerminal("unExpr"); + var unOp = new NonTerminal("unOp"); + var binExpr = new NonTerminal("binExpr"); + var binOp = new NonTerminal("binOp"); + var betweenExpr = new NonTerminal("betweenExpr"); + var inExpr = new NonTerminal("inExpr"); + var parSelectStmt = new NonTerminal("parSelectStmt"); + var notOpt = new NonTerminal("notOpt"); + var funCall = new NonTerminal("funCall"); + var stmtLine = new NonTerminal("stmtLine"); + var semiOpt = new NonTerminal("semiOpt"); + var stmtList = new NonTerminal("stmtList"); + var funArgs = new NonTerminal("funArgs"); + var inStmt = new NonTerminal("inStmt"); + + //BNF Rules + this.Root = stmtList; + stmtLine.Rule = stmt + semiOpt; + semiOpt.Rule = Empty | ";"; + stmtList.Rule = MakePlusRule(stmtList, stmtLine); + + // 标识符语法规则 + Id.Rule = MakePlusRule(Id, dot, Id_simple); + + stmt.Rule = createTableStmt | createIndexStmt | alterStmt + | dropTableStmt | dropIndexStmt + | selectStmt | insertStmt | updateStmt | deleteStmt + | "GO"; + // Create Table + createTableStmt.Rule = CREATE + TABLE + Id + "(" + fieldDefList + ")" + constraintListOpt; + fieldDefList.Rule = MakePlusRule(fieldDefList, comma, fieldDef); + fieldDef.Rule = Id + typeName + typeParamsOpt + nullSpecOpt; + nullSpecOpt.Rule = NULL | NOT + NULL | Empty; + typeName.Rule = ToTerm("BIT") | "DATE" | "TIME" | "TIMESTAMP" | "DECIMAL" | "REAL" | "FLOAT" | "SMALLINT" | "INTEGER" + | "INTERVAL" | "CHARACTER" + // MS SQL types: + | "DATETIME" | "INT" | "DOUBLE" | "CHAR" | "NCHAR" | "VARCHAR" | "NVARCHAR" + | "IMAGE" | "TEXT" | "NTEXT"; + typeParamsOpt.Rule = "(" + number + ")" | "(" + number + comma + number + ")" | Empty; + constraintDef.Rule = CONSTRAINT + Id + constraintTypeOpt; + constraintListOpt.Rule = MakeStarRule(constraintListOpt, constraintDef); + constraintTypeOpt.Rule = PRIMARY + KEY + idlistPar | UNIQUE + idlistPar | NOT + NULL + idlistPar + | "Foreign" + KEY + idlistPar + "References" + Id + idlistPar; + idlistPar.Rule = "(" + idlist + ")"; + idlist.Rule = MakePlusRule(idlist, comma, Id); + + //Create Index + createIndexStmt.Rule = CREATE + uniqueOpt + INDEX + Id + ON + Id + orderList + withClauseOpt; + uniqueOpt.Rule = Empty | UNIQUE; + orderList.Rule = MakePlusRule(orderList, comma, orderMember); + orderMember.Rule = Id + orderDirOpt; + orderDirOpt.Rule = Empty | "ASC" | "DESC"; + withClauseOpt.Rule = Empty | WITH + PRIMARY | WITH + "Disallow" + NULL | WITH + "Ignore" + NULL; + + //Alter + alterStmt.Rule = ALTER + TABLE + Id + alterCmd; + alterCmd.Rule = ADD + COLUMN + fieldDefList + constraintListOpt + | ADD + constraintDef + | DROP + COLUMN + Id + | DROP + CONSTRAINT + Id; + + //Drop stmts + dropTableStmt.Rule = DROP + TABLE + Id; + dropIndexStmt.Rule = DROP + INDEX + Id + ON + Id; + + // 插入语法规则 + insertStmt.Rule = INSERT + intoOpt + Id + idlistPar + insertData; + insertData.Rule = selectStmt | VALUES + "(" + exprList + ")"; + intoOpt.Rule = Empty | INTO; //Into is optional in MSSQL + + // 更新语法规则 + updateStmt.Rule = UPDATE + Id + SET + assignList + whereClauseOpt; + assignList.Rule = MakePlusRule(assignList, comma, assignment); + assignment.Rule = Id + "=" + expression; + + // 删除语法规则 + deleteStmt.Rule = DELETE + FROM + Id + whereClauseOpt; + + // 查询语法规则 + selectStmt.Rule = SELECT + selRestrOpt + selList + intoClauseOpt + fromClauseOpt + whereClauseOpt + + groupClauseOpt + havingClauseOpt + orderClauseOpt; + selRestrOpt.Rule = Empty | "ALL" | "DISTINCT"; + selList.Rule = columnItemList | "*"; + columnItemList.Rule = MakePlusRule(columnItemList, comma, columnItem); + columnItem.Rule = columnSource + aliasOpt; + aliasOpt.Rule = Empty | asOpt + Id; + asOpt.Rule = Empty | AS; + columnSource.Rule = aggregate | Id; + aggregate.Rule = aggregateName + "(" + aggregateArg + ")"; + aggregateArg.Rule = expression | "*"; + aggregateName.Rule = COUNT | "Avg" | "Min" | "Max" | "StDev" | "StDevP" | "Sum" | "Var" | "VarP"; + intoClauseOpt.Rule = Empty | INTO + Id; + fromClauseOpt.Rule = Empty | FROM + idlist + joinChainOpt; + joinChainOpt.Rule = Empty | joinKindOpt + JOIN + idlist + ON + Id + "=" + Id; + joinKindOpt.Rule = Empty | "INNER" | "LEFT" | "RIGHT"; + whereClauseOpt.Rule = Empty | "WHERE" + expression; + groupClauseOpt.Rule = Empty | "GROUP" + BY + idlist; + havingClauseOpt.Rule = Empty | "HAVING" + expression; + orderClauseOpt.Rule = Empty | "ORDER" + BY + orderList; + + //Expression + exprList.Rule = MakePlusRule(exprList, comma, expression); + expression.Rule = term | unExpr | binExpr;// | betweenExpr; //-- BETWEEN doesn't work - yet; brings a few parsing conflicts + term.Rule = Id | string_literal | number | funCall | tuple | parSelectStmt;// | inStmt; + tuple.Rule = "(" + exprList + ")"; + parSelectStmt.Rule = "(" + selectStmt + ")"; + unExpr.Rule = unOp + term; + unOp.Rule = NOT | "+" | "-" | "~"; + binExpr.Rule = expression + binOp + expression; + binOp.Rule = ToTerm("+") | "-" | "*" | "/" | "%" //arithmetic + | "&" | "|" | "^" //bit + | "=" | ">" | "<" | ">=" | "<=" | "<>" | "!=" | "!<" | "!>" + | "AND" | "OR" | "LIKE" | NOT + "LIKE" | "IN" | NOT + "IN"; + betweenExpr.Rule = expression + notOpt + "BETWEEN" + expression + "AND" + expression; + notOpt.Rule = Empty | NOT; + //funCall covers some psedo-operators and special forms like ANY(...), SOME(...), ALL(...), EXISTS(...), IN(...) + funCall.Rule = Id + "(" + funArgs + ")"; + funArgs.Rule = selectStmt | exprList; + inStmt.Rule = expression + "IN" + "(" + exprList + ")"; + + //Operators + RegisterOperators(10, "*", "/", "%"); + RegisterOperators(9, "+", "-"); + RegisterOperators(8, "=", ">", "<", ">=", "<=", "<>", "!=", "!<", "!>", "LIKE", "IN"); + RegisterOperators(7, "^", "&", "|"); + RegisterOperators(6, NOT); + RegisterOperators(5, "AND"); + RegisterOperators(4, "OR"); + + MarkPunctuation(",", "(", ")"); + MarkPunctuation(asOpt, semiOpt); + //Note: we cannot declare binOp as transient because it includes operators "NOT LIKE", "NOT IN" consisting of two tokens. + // Transient non-terminals cannot have more than one non-punctuation child nodes. + // Instead, we set flag InheritPrecedence on binOp , so that it inherits precedence value from it's children, and this precedence is used + // in conflict resolution when binOp node is sitting on the stack + base.MarkTransient(stmt, term, asOpt, aliasOpt, stmtLine, expression, unOp, tuple); + binOp.SetFlag(TermFlags.InheritPrecedence); + }//constructor + }//class +} \ No newline at end of file diff --git a/src/Irony.SampleApp/Sanchime.Irony.SampleApp.csproj b/src/Irony.SampleApp/Sanchime.Irony.SampleApp.csproj new file mode 100644 index 0000000..e6755da --- /dev/null +++ b/src/Irony.SampleApp/Sanchime.Irony.SampleApp.csproj @@ -0,0 +1,13 @@ + + + + Exe + net6.0 + + + + + + + + diff --git a/src/Irony.Tests/CommentTerminalTests.cs b/src/Irony.Tests/CommentTerminalTests.cs new file mode 100644 index 0000000..4cd6c06 --- /dev/null +++ b/src/Irony.Tests/CommentTerminalTests.cs @@ -0,0 +1,24 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class CommentTerminalTests + { + [Fact] + public void TestCommentTerminal() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new CommentTerminal("Comment", "/*", "*/")); + token = parser.ParseInput("/* abc */"); + Assert.True(token.Category == TokenCategory.Comment, "Failed to read comment"); + + parser = TestHelper.CreateParser(new CommentTerminal("Comment", "//", "\n")); + token = parser.ParseInput("// abc \n "); + Assert.True(token.Category == TokenCategory.Comment, "Failed to read line comment"); + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/DataLiteralsTests.cs b/src/Irony.Tests/DataLiteralsTests.cs new file mode 100644 index 0000000..79bd61b --- /dev/null +++ b/src/Irony.Tests/DataLiteralsTests.cs @@ -0,0 +1,52 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using System; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class DataLiteralsTests + { + [Fact] + public void TestDataLiterals() + { + Parser parser; Token token; + Terminal term; + + // FixedLengthLiteral --------------------------------------------------------- + term = new FixedLengthLiteral("fixedLengthInteger", 2, TypeCode.Int32); + parser = TestHelper.CreateParser(term, null); + + token = parser.ParseInput("1200"); + Assert.True(token.Value != null, "Failed to parse fixed-length integer."); + Assert.True((int)token.Value == 12, "Failed to parse fixed-length integer - result value does not match."); + + term = new FixedLengthLiteral("fixedLengthString", 2, TypeCode.String); + parser = TestHelper.CreateParser(term); + token = parser.ParseInput("abcd", useTerminator: false); + Assert.True(token != null && token.Value != null, "Failed to parse fixed-length string."); + Assert.True((string)token.Value == "ab", "Failed to parse fixed-length string - result value does not match"); + + // DsvLiteral ---------------------------------------------------------------- + term = new DsvLiteral("DsvInteger", TypeCode.Int32, ","); + parser = TestHelper.CreateParser(term); + token = parser.ParseInput("12,"); + Assert.True(token != null && token.Value != null, "Failed to parse CSV integer."); + Assert.True((int)token.Value == 12, "Failed to parse CSV integer - result value does not match."); + + term = new DsvLiteral("DsvInteger", TypeCode.String, ","); + parser = TestHelper.CreateParser(term); + token = parser.ParseInput("ab,"); + Assert.True(token != null && token.Value != null, "Failed to parse CSV string."); + Assert.True((string)token.Value == "ab", "Failed to parse CSV string - result value does not match."); + + // QuotedValueLiteral ---------------------------------------------------------------- + term = new QuotedValueLiteral("QVDate", "#", TypeCode.DateTime); + parser = TestHelper.CreateParser(term); + token = parser.ParseInput("#11/15/2009#"); + Assert.True(token != null && token.Value != null, "Failed to parse quoted date."); + Assert.True((DateTime)token.Value == new DateTime(2009, 11, 15), "Failed to parse quoted date - result value does not match."); + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/ErrorRecoveryTests.cs b/src/Irony.Tests/ErrorRecoveryTests.cs new file mode 100644 index 0000000..e7f8122 --- /dev/null +++ b/src/Irony.Tests/ErrorRecoveryTests.cs @@ -0,0 +1,47 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class ErrorRecoveryTests + { + #region Grammars + + //A simple grammar for language consisting of simple assignment statements: x=y + z; z= t + m; + public class ErrorRecoveryGrammar : Grammar + { + public ErrorRecoveryGrammar() + { + var id = new IdentifierTerminal("id"); + var expr = new NonTerminal("expr"); + var stmt = new NonTerminal("stmt"); + var stmtList = new NonTerminal("stmt"); + + Root = stmtList; + stmtList.Rule = MakeStarRule(stmtList, stmt); + stmt.Rule = id + "=" + expr + ";"; + stmt.ErrorRule = SyntaxError + ";"; + expr.Rule = id | id + "+" + id; + } + }// class + + #endregion Grammars + + [Fact] + public void TestErrorRecovery() + { + var grammar = new ErrorRecoveryGrammar(); + var parser = new Parser(grammar); + TestHelper.CheckGrammarErrors(parser); + + //correct sample + var parseTree = parser.Parse("x = y; y = z + m; m = n;"); + Assert.False(parseTree.HasErrors(), "Unexpected parse errors in correct source sample."); + + parseTree = parser.Parse("x = y; m = = d ; y = z + m; x = z z; m = n;"); + Assert.True(2 == parseTree.ParserMessages.Count, "Invalid # of errors."); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/EvaluatorTests.cs b/src/Irony.Tests/EvaluatorTests.cs new file mode 100644 index 0000000..de07895 --- /dev/null +++ b/src/Irony.Tests/EvaluatorTests.cs @@ -0,0 +1,274 @@ +using Sanchime.Irony.Interpreter._Evaluator; +using Sanchime.Irony.Interpreter.SriptApplication; +using System; +using System.Collections.Generic; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class EvaluatorTests + { + [Fact] + public void TestEvaluator_Ops() + { + var eval = new ExpressionEvaluator(); + string script; + object result; + + //Simple computation + script = "2*3"; + result = eval.Evaluate(script); + Assert.Equal(6, result); + + //Using variables + script = @" +x=2 +y=4 +x * y +"; + result = eval.Evaluate(script); + Assert.Equal(8, result); + + //Operator precedence + script = @" +x=2 +y=3 +x + y * 5 +"; + result = eval.Evaluate(script); + Assert.Equal(17, result); + + //parenthesis + script = @" +x=3 +y=2 +1 + (x - y) * 5 +"; + result = eval.Evaluate(script); + Assert.Equal(6, result); + + //strings + script = @" +x='2' +y='3' +x + y + 4 +"; + result = eval.Evaluate(script); + Assert.Equal("234", result); + + //string with embedded expressions + script = @" +x = 4 +y = 7 +'#{x} * #{y} = #{x * y}' +"; + result = eval.Evaluate(script); + Assert.Equal("4 * 7 = 28", result); + + //various operators + script = @" +x = 1 + 2 * 3 # =7 +y = --x # = 6 +z = x * 1.5 # = 9 +z -= y # = 3 +"; + result = eval.Evaluate(script); + Assert.InRange(3.0 - (double)result, -0.0001, 0.0001); + + //&&, || operators + script = @"x = (1 > 0) || (1/0)"; + result = eval.Evaluate(script); + Assert.Equal(true, result); + + //Operator precedence test + script = @"2+3*3*3"; + result = eval.Evaluate(script); + Assert.Equal(29, result); + + script = @"x = (1 < 0) && (1/0)"; + result = eval.Evaluate(script); + Assert.Equal(false, result); + } + + [Fact] + public void TestEvaluator_BuiltIns() + { + var eval = new ExpressionEvaluator(); + string script; + object result; + + //Using methods imported from System.Math class + + //TODO this generates System.Reflection.AmbiguousMatchException + script = @"abs(-1.0) + Log10(100.0) + sqrt(9) + floor(4.5) + sin(PI/2)"; + result = eval.Evaluate(script); + Assert.True(result is double, "Result is not double."); + Assert.InRange(11.0 - (double)result, -0.001, 0.001); + + //Using methods imported from System.Environment + script = @"report = '#{MachineName}-#{ProcessorCount}'"; + result = eval.Evaluate(script); + var expected = string.Format("{0}-{1}", Environment.MachineName, Environment.ProcessorCount); + Assert.Equal(expected, result); + + //Using special built-in methods print and format + eval.ClearOutput(); + script = @"print(format('{0} * {1} = {2}', 3, 4, 3 * 4))"; + eval.Evaluate(script); + result = eval.GetOutput(); + Assert.Equal("3 * 4 = 12\r\n", result); + + //Add custom built-in method SayHello and test it + //eval.Runtime.BuiltIns.AddMethod(SayHello, "SayHello", 1, 1, "name"); + script = @"SayHello('John')"; + result = eval.Evaluate(script); + Assert.Equal("Hello, John!", result); + } + + //custom built-in method added to evaluator in Built-in tests + public static string SayHello(ScriptThread thread, object[] args) + { + return "Hello, " + args[0] + "!"; + } + + [Fact] + public void TestEvaluator_Iif() + { + var eval = new ExpressionEvaluator(); + string script; + object result; + + //Test '? :' operator + script = @"1 < 0 ? 1/0 : 'false' "; // Notice that (1/0) is not evaluated + result = eval.Evaluate(script); + Assert.Equal("false", result); + + //Test iif special form + script = @"iif(1 > 0, 'true', 1/0) "; //Notice that (1/0) is not evaluated + result = eval.Evaluate(script); + Assert.Equal("true", result); + } + + [Fact] + public void TestEvaluator_MemberAccess() + { + var eval = new ExpressionEvaluator(); + eval.Globals["foo"] = new Foo(); + string script; + object result; + + //Test access to field, prop, calling a method + script = @"foo.Field + ',' + foo.Prop + ',' + foo.GetStuff()"; + result = eval.Evaluate(script); + Assert.Equal("F,P,S", result); + + script = @" +foo.Field = 'FF' +foo.Prop = 'PP' +R = foo.Field + foo.Prop "; + result = eval.Evaluate(script); + Assert.Equal("FFPP", result); + + //Test access to indexed properties + + //TODO this generates System.Reflection.AmbiguousMatchException + script = @"foo[3]"; + result = eval.Evaluate(script); + Assert.Equal("#3", result); + + //TODO this generates System.Reflection.AmbiguousMatchException + script = @"foo['a']"; + result = eval.Evaluate(script); + Assert.Equal("V-a", result); + + // Test with string literal + script = @" '0123'.Substring(1) + 'abcd'.Length "; + result = eval.Evaluate(script); + Assert.Equal("1234", result); + } + + //A class used for member access testing + public class Foo + { + public string Field = "F"; + public string Prop { get; set; } + + public Foo() + { + Prop = "P"; + } + + public string GetStuff() + { + return "S"; + } + + public string this[int i] + { + get { return "#" + i; } + set { } + } + + public string this[string key] + { + get { return "V-" + key; } + set { } + } + } + + [Fact] + public void TestEvaluator_ArrayDictDataRow() + { + var eval = new ExpressionEvaluator(); + //Create an array, a dictionary and a data row and add them to Globals + eval.Globals["primes"] = new int[] { 3, 5, 7, 11, 13 }; + var nums = new Dictionary(StringComparer.CurrentCultureIgnoreCase); + nums["one"] = "1"; + nums["two"] = "2"; + nums["three"] = "3"; + eval.Globals["nums"] = nums; + //var t = new System.Data.DataTable(); + //t.Columns.Add("Name", typeof(string)); + //t.Columns.Add("Age", typeof(int)); + //var row = t.NewRow(); + var row = new Dictionary(StringComparer.OrdinalIgnoreCase); + row["Name"] = "John"; + row["Age"] = 30; + eval.Globals["row"] = row; + + string script; + object result; + + //Test array + script = @"primes[3]"; + result = eval.Evaluate(script); + Assert.Equal(11, result); + script = @" +primes[3] = 12345 +primes[3]"; + result = eval.Evaluate(script); + Assert.Equal(12345, result); + + //Test dict + script = @"nums['three'] + nums['two'] + nums['one']"; + result = eval.Evaluate(script); + Assert.Equal("321", result); + script = @" +nums['two'] = '22' +nums['three'] + nums['two'] + nums['one'] +"; + result = eval.Evaluate(script); + Assert.Equal("3221", result); + + //Test data row + script = @"row['Name'] + ', ' + row['age']"; + result = eval.Evaluate(script); + Assert.Equal("John, 30", result); + script = @" +row['Name'] = 'Jon' +row['Name'] + ', ' + row['age']"; + result = eval.Evaluate(script); + Assert.Equal("Jon, 30", result); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/FreeTextLiteralTests.cs b/src/Irony.Tests/FreeTextLiteralTests.cs new file mode 100644 index 0000000..ad28bb6 --- /dev/null +++ b/src/Irony.Tests/FreeTextLiteralTests.cs @@ -0,0 +1,90 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class FreeTextLiteralTests + { + //A special grammar that does not skip whitespace + private class FreeTextLiteralTestGrammar : Grammar + { + public string Terminator = "END"; + + public FreeTextLiteralTestGrammar(Terminal terminal) + : base(caseSensitive: true) + { + var rule = new BnfExpression(terminal); + MarkReservedWords(Terminator); + rule += Terminator; + Root = new NonTerminal("Root"); + Root.Rule = rule; + } + + //Overrides base method, effectively suppressing skipping whitespaces + public override void SkipWhitespace(ISourceStream source) + { + return; + } + }//class + + private Parser CreateParser(Terminal terminal) + { + var grammar = new FreeTextLiteralTestGrammar(terminal); + return new Parser(grammar); + } + + private Token GetFirst(ParseTree tree) + { + return tree.Tokens[0]; + } + + //The following test method and a fix are contributed by ashmind codeplex user + [Fact] + public void TestFreeTextLiteral_Escapes() + { + Parser parser; Token token; + + //Escapes test + var term = new FreeTextLiteral("FreeText", ",", ")"); + term.Escapes.Add(@"\\", @"\"); + term.Escapes.Add(@"\,", @","); + term.Escapes.Add(@"\)", @")"); + + parser = CreateParser(term); + token = GetFirst(parser.Parse(@"abc\\de\,\)fg,")); + Assert.False(token == null, "Failed to produce a token on valid string."); + Assert.True(term == token.Terminal, "Failed to scan a string - invalid Terminal in the returned token."); + Assert.True(token.Value.ToString() == @"abc\de,)fg", "Failed to scan a string"); + + term = new FreeTextLiteral("FreeText", FreeTextOptions.AllowEof, ";"); + parser = CreateParser(term); + token = GetFirst(parser.Parse(@"abcdefg")); + Assert.False(token == null, "Failed to produce a token for free text ending at EOF."); + Assert.True(term == token.Terminal, "Failed to scan a free text ending at EOF - invalid Terminal in the returned token."); + Assert.True(token.Value.ToString() == @"abcdefg", "Failed to scan a free text ending at EOF"); + + //The following test method and a fix are contributed by ashmind codeplex user + //VAR + //MESSAGE:STRING80; + //(*_ORError Message*) + //END_VAR + term = new FreeTextLiteral("varContent", "END_VAR"); + term.Firsts.Add("VAR"); + parser = CreateParser(term); + token = GetFirst(parser.Parse("VAR\r\nMESSAGE:STRING80;\r\n(*_ORError Message*)\r\nEND_VAR")); + Assert.False(token == null, "Failed to produce a token on valid string."); + Assert.True(term == token.Terminal, "Failed to scan a string - invalid Terminal in the returned token."); + Assert.True(token.ValueString == "\r\nMESSAGE:STRING80;\r\n(*_ORError Message*)\r\n", "Failed to scan a string"); + + term = new FreeTextLiteral("freeText", FreeTextOptions.AllowEof); + parser = CreateParser(term); + token = GetFirst(parser.Parse(" ")); + Assert.False(token == null, "Failed to produce a token on valid string."); + Assert.True(term == token.Terminal, "Failed to scan a string - invalid Terminal in the returned token."); + Assert.True(token.ValueString == " ", "Failed to scan a string"); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/IdentifierTerminalTests.cs b/src/Irony.Tests/IdentifierTerminalTests.cs new file mode 100644 index 0000000..05441cc --- /dev/null +++ b/src/Irony.Tests/IdentifierTerminalTests.cs @@ -0,0 +1,76 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class IdentifierTerminalTests + { + [Fact] + public void TestIdentifier_CSharp() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(TerminalFactory.CreateCSharpIdentifier("Identifier")); + token = parser.ParseInput("x "); + Assert.True(token.Terminal.Name == "Identifier", "Failed to parse identifier"); + Assert.True((string)token.Value == "x", "Failed to parse identifier"); + token = parser.ParseInput("_a01 "); + Assert.True(token.Terminal.Name == "Identifier", "Failed to parse identifier starting with _"); + Assert.True((string)token.Value == "_a01", "Failed to parse identifier starting with _"); + + token = parser.ParseInput("0abc "); + Assert.True(token.IsError(), "Erroneously recognized an identifier."); + + token = parser.ParseInput(@"_\u0061bc "); + Assert.True(token.Terminal.Name == "Identifier", "Failed to parse identifier starting with _"); + Assert.True((string)token.Value == "_abc", "Failed to parse identifier containing escape sequence \\u"); + + token = parser.ParseInput(@"a\U00000062c_ "); + Assert.True(token.Terminal.Name == "Identifier", "Failed to parse identifier starting with _"); + Assert.True((string)token.Value == "abc_", "Failed to parse identifier containing escape sequence \\U"); + }//method + + [Fact] + public void TestIdentifier_CaseRestrictions() + { + Parser parser; Token token; + + var id = new IdentifierTerminal("identifier"); + id.CaseRestriction = CaseRestriction.None; + parser = TestHelper.CreateParser(id); + + token = parser.ParseInput("aAbB"); + Assert.True(token != null, "Failed to scan an identifier aAbB."); + + id.CaseRestriction = CaseRestriction.FirstLower; + parser = TestHelper.CreateParser(id); + token = parser.ParseInput("BCD"); + Assert.True(token.IsError(), "Erroneously recognized an identifier BCD with FirstLower restriction."); + token = parser.ParseInput("bCd "); + Assert.True(token != null && token.ValueString == "bCd", "Failed to scan identifier bCd with FirstLower restriction."); + + id.CaseRestriction = CaseRestriction.FirstUpper; + parser = TestHelper.CreateParser(id); + token = parser.ParseInput("cDE"); + Assert.True(TokenCategory.Error == token.Category, "Erroneously recognized an identifier cDE with FirstUpper restriction."); + token = parser.ParseInput("CdE"); + Assert.True(token != null && token.ValueString == "CdE", "Failed to scan identifier CdE with FirstUpper restriction."); + + id.CaseRestriction = CaseRestriction.AllLower; + parser = TestHelper.CreateParser(id); + token = parser.ParseInput("DeF"); + Assert.True(token.IsError(), "Erroneously recognized an identifier DeF with AllLower restriction."); + token = parser.ParseInput("def"); + Assert.True(token != null && token.ValueString == "def", "Failed to scan identifier def with AllLower restriction."); + + id.CaseRestriction = CaseRestriction.AllUpper; + parser = TestHelper.CreateParser(id); + token = parser.ParseInput("EFg "); + Assert.True(token.IsError(), "Erroneously recognized an identifier EFg with AllUpper restriction."); + token = parser.ParseInput("EFG"); + Assert.True(token != null && token.ValueString == "EFG", "Failed to scan identifier EFG with AllUpper restriction."); + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/IntegrationTests.cs b/src/Irony.Tests/IntegrationTests.cs new file mode 100644 index 0000000..2a266cc --- /dev/null +++ b/src/Irony.Tests/IntegrationTests.cs @@ -0,0 +1,104 @@ +using Sanchime.Irony.Parsing.Data; +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +//Tests of Visual Studio integration functionality + +namespace Sanchime.Irony.Tests +{ + public class IntegrationTestGrammar : Grammar + { + public IntegrationTestGrammar() + { + var comment = new CommentTerminal("comment", "/*", "*/"); + NonGrammarTerminals.Add(comment); + var str = new StringLiteral("str", "'", StringOptions.AllowsLineBreak); + var stmt = new NonTerminal("stmt"); + stmt.Rule = str | Empty; + Root = stmt; + } + }//class + + public class IntegrationTests + { + private Grammar _grammar; + private LanguageData _language; + private Scanner _scanner; + private ParsingContext _context; + private int _state; + + private void Init(Grammar grammar) + { + _grammar = grammar; + _language = new LanguageData(_grammar); + var parser = new Parser(_language); + _scanner = parser.Scanner; + _context = parser.Context; + _context.Mode = ParseMode.VsLineScan; + } + + private void SetSource(string text) + { + _scanner.VsSetSource(text, 0); + } + + private Token Read() + { + Token token = _scanner.VsReadToken(ref _state); + return token; + } + + [Fact] + public void TestIntegration_VsScanningComment() + { + Init(new IntegrationTestGrammar()); + SetSource(" /* "); + Token token = Read(); + Assert.True(token.IsSet(TokenFlags.IsIncomplete), "Expected incomplete token (line 1)"); + token = Read(); + Assert.True(token == null, "NULL expected"); + SetSource(" comment "); + token = Read(); + Assert.True(token.IsSet(TokenFlags.IsIncomplete), "Expected incomplete token (line 2)"); + token = Read(); + Assert.True(token == null, "NULL expected"); + SetSource(" */ /*x*/"); + token = Read(); + Assert.False(token.IsSet(TokenFlags.IsIncomplete), "Expected complete token (line 3)"); + token = Read(); + Assert.False(token.IsSet(TokenFlags.IsIncomplete), "Expected complete token (line 3)"); + token = Read(); + Assert.True(token == null, "Null expected."); + } + + [Fact] + public void TestIntegration_VsScanningString() + { + Init(new IntegrationTestGrammar()); + SetSource(" 'abc"); + Token token = Read(); + Assert.True(token.ValueString == "abc", "Expected incomplete token 'abc' (line 1)"); + Assert.True(token.IsSet(TokenFlags.IsIncomplete), "Expected incomplete token (line 1)"); + token = Read(); + Assert.True(token == null, "NULL expected"); + SetSource(" def "); + token = Read(); + Assert.True(token.ValueString == " def ", "Expected incomplete token ' def ' (line 2)"); + Assert.True(token.IsSet(TokenFlags.IsIncomplete), "Expected incomplete token (line 2)"); + token = Read(); + Assert.True(token == null, "NULL expected"); + SetSource("ghi' 'x'"); + token = Read(); + Assert.True(token.ValueString == "ghi", "Expected token 'ghi' (line 3)"); + Assert.False(token.IsSet(TokenFlags.IsIncomplete), "Expected complete token (line 3)"); + token = Read(); + Assert.True(token.ValueString == "x", "Expected token 'x' (line 3)"); + Assert.False(token.IsSet(TokenFlags.IsIncomplete), "Expected complete token (line 3)"); + token = Read(); + Assert.True(token == null, "Null expected."); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/LineContinuationTests.cs b/src/Irony.Tests/LineContinuationTests.cs new file mode 100644 index 0000000..9059fb6 --- /dev/null +++ b/src/Irony.Tests/LineContinuationTests.cs @@ -0,0 +1,60 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class LineContinuationTests + { + [Fact] + public void TestContinuationTerminal_Simple() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new LineContinuationTerminal("LineContinuation", "\\")); + token = parser.ParseInput("\\\r\t"); + Assert.True(token.Category == TokenCategory.Outline, "Failed to read simple line continuation terminal"); + } + + [Fact] + public void TestContinuationTerminal_Default() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new LineContinuationTerminal("LineContinuation")); + token = parser.ParseInput("_\r\n\t"); + Assert.True(token.Category == TokenCategory.Outline, "Failed to read default line continuation terminal"); + + token = parser.ParseInput("\\\v "); + Assert.True(token.Category == TokenCategory.Outline, "Failed to read default line continuation terminal"); + } + + [Fact] + public void TestContinuationTerminal_Complex() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new LineContinuationTerminal("LineContinuation", @"\continue", @"\cont", "++CONTINUE++")); + token = parser.ParseInput("\\cont \r\n "); + Assert.True(token.Category == TokenCategory.Outline, "Failed to read complex line continuation terminal"); + + token = parser.ParseInput("++CONTINUE++\t\v"); + Assert.True(token.Category == TokenCategory.Outline, "Failed to read complex line continuation terminal"); + } + + [Fact] + public void TestContinuationTerminal_Incomplete() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new LineContinuationTerminal("LineContinuation")); + token = parser.ParseInput("\\ garbage"); + Assert.True(token.Category == TokenCategory.Error, "Failed to read incomplete line continuation terminal"); + + parser = TestHelper.CreateParser(new LineContinuationTerminal("LineContinuation")); + token = parser.ParseInput("_"); + Assert.True(token.Category == TokenCategory.Error, "Failed to read incomplete line continuation terminal"); + } + } +} \ No newline at end of file diff --git a/src/Irony.Tests/NumberLiteralTests.cs b/src/Irony.Tests/NumberLiteralTests.cs new file mode 100644 index 0000000..0582062 --- /dev/null +++ b/src/Irony.Tests/NumberLiteralTests.cs @@ -0,0 +1,400 @@ +//Authors: Roman Ivantsov, Philipp Serr + +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using System; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class NumberLiteralTests + { + [Fact] + public void TestNumber_General() + { + Parser parser; Token token; + + NumberLiteral number = new NumberLiteral("Number"); + number.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; + parser = TestHelper.CreateParser(number); + token = parser.ParseInput("123"); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 123, "Failed to read int value"); + token = parser.ParseInput("123.4"); + Assert.True(Math.Abs(Convert.ToDouble(token.Value) - 123.4) < 0.000001, "Failed to read float value"); + //100 digits + string sbig = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + token = parser.ParseInput(sbig); + Assert.True(token.Value.ToString() == sbig, "Failed to read big integer value"); + }//method + + //The following "sign" test methods and a fix are contributed by ashmind codeplex user + [Fact] + public void TestNumber_SignedDoesNotMatchSingleMinus() + { + Parser parser; Token token; + + var number = new NumberLiteral("number", NumberOptions.AllowSign); + parser = TestHelper.CreateParser(number); + token = parser.ParseInput("-"); + Assert.True(token.IsError(), "Parsed single '-' as a number value."); + } + + [Fact] + public void TestNumber_SignedDoesNotMatchSinglePlus() + { + Parser parser; Token token; + + var number = new NumberLiteral("number", NumberOptions.AllowSign); + parser = TestHelper.CreateParser(number); + token = parser.ParseInput("+"); + Assert.True(token.IsError(), "Parsed single '+' as a number value."); + } + + [Fact] + public void TestNumber_SignedMatchesNegativeCorrectly() + { + Parser parser; Token token; + + var number = new NumberLiteral("number", NumberOptions.AllowSign); + parser = TestHelper.CreateParser(number); + token = parser.ParseInput("-500"); + Assert.Equal(-500, token.Value); + } + + [Fact] + public void TestNumber_CSharp() + { + Parser parser; Token token; + + double eps = 0.0001; + parser = TestHelper.CreateParser(TerminalFactory.CreateCSharpNumber("Number")); + + //Simple integers and suffixes + token = parser.ParseInput("123 "); + CheckType(token, typeof(int)); + Assert.True(token.Details != null, "ScanDetails object not found in token."); + Assert.True((int)token.Value == 123, "Failed to read int value"); + + token = parser.ParseInput(int.MaxValue.ToString()); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == int.MaxValue, "Failed to read Int32.MaxValue."); + + token = parser.ParseInput(ulong.MaxValue.ToString()); + CheckType(token, typeof(ulong)); + Assert.True((ulong)token.Value == ulong.MaxValue, "Failed to read uint64.MaxValue value"); + + token = parser.ParseInput("123U "); + CheckType(token, typeof(uint)); + Assert.True((uint)token.Value == 123, "Failed to read uint value"); + + token = parser.ParseInput("123L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 123, "Failed to read long value"); + + token = parser.ParseInput("123uL "); + CheckType(token, typeof(ulong)); + Assert.True((ulong)token.Value == 123, "Failed to read ulong value"); + + //Hex representation + token = parser.ParseInput("0x012 "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 0x012, "Failed to read hex int value"); + + token = parser.ParseInput("0x12U "); + CheckType(token, typeof(uint)); + Assert.True((uint)token.Value == 0x012, "Failed to read hex uint value"); + + token = parser.ParseInput("0x012L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 0x012, "Failed to read hex long value"); + + token = parser.ParseInput("0x012uL "); + CheckType(token, typeof(ulong)); + Assert.True((ulong)token.Value == 0x012, "Failed to read hex ulong value"); + + //Floating point types + token = parser.ParseInput("123.4 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #1"); + + token = parser.ParseInput("1234e-1 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 1234e-1) < eps, "Failed to read double value #2"); + + token = parser.ParseInput("12.34e+01 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #3"); + + token = parser.ParseInput("0.1234E3 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #4"); + + token = parser.ParseInput("123.4f "); + CheckType(token, typeof(float)); + Assert.True(Math.Abs((float)token.Value - 123.4) < eps, "Failed to read float(single) value"); + + token = parser.ParseInput("123.4m "); + CheckType(token, typeof(decimal)); + Assert.True(Math.Abs((decimal)token.Value - 123.4m) < Convert.ToDecimal(eps), "Failed to read decimal value"); + + token = parser.ParseInput("123. ", useTerminator: false); //should ignore dot and read number as int. compare it to python numbers - see below + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 123, "Failed to read int value with trailing dot"); + + //Quick parse + token = parser.ParseInput("1 "); + CheckType(token, typeof(int)); + //When going through quick parse path (for one-digit numbers), the NumberScanInfo record is not created and hence is absent in Attributes + Assert.True(token.Details == null, "Quick parse test failed: ScanDetails object is found in token - quick parse path should not produce this object."); + Assert.True((int)token.Value == 1, "Failed to read quick-parse value"); + } + + [Fact] + public void TestNumber_VB() + { + Parser parser; Token token; + + double eps = 0.0001; + parser = TestHelper.CreateParser(TerminalFactory.CreateVbNumber("Number")); + + //Simple integer + token = parser.ParseInput("123 "); + CheckType(token, typeof(int)); + Assert.True(token.Details != null, "ScanDetails object not found in token."); + Assert.True((int)token.Value == 123, "Failed to read int value"); + + //Test all suffixes + token = parser.ParseInput("123S "); + CheckType(token, typeof(short)); + Assert.True((short)token.Value == 123, "Failed to read short value"); + + token = parser.ParseInput("123I "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 123, "Failed to read int value"); + + token = parser.ParseInput("123% "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 123, "Failed to read int value"); + + token = parser.ParseInput("123L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 123, "Failed to read long value"); + + token = parser.ParseInput("123& "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 123, "Failed to read long value"); + + token = parser.ParseInput("123us "); + CheckType(token, typeof(ushort)); + Assert.True((ushort)token.Value == 123, "Failed to read ushort value"); + + token = parser.ParseInput("123ui "); + CheckType(token, typeof(uint)); + Assert.True((uint)token.Value == 123, "Failed to read uint value"); + + token = parser.ParseInput("123ul "); + CheckType(token, typeof(ulong)); + Assert.True((ulong)token.Value == 123, "Failed to read ulong value"); + + //Hex and octal + token = parser.ParseInput("&H012 "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 0x012, "Failed to read hex int value"); + + token = parser.ParseInput("&H012L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 0x012, "Failed to read hex long value"); + + token = parser.ParseInput("&O012 "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 10, "Failed to read octal int value"); //12(oct) = 10(dec) + + token = parser.ParseInput("&o012L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 10, "Failed to read octal long value"); + + //Floating point types + token = parser.ParseInput("123.4 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #1"); + + token = parser.ParseInput("1234e-1 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 1234e-1) < eps, "Failed to read double value #2"); + + token = parser.ParseInput("12.34e+01 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #3"); + + token = parser.ParseInput("0.1234E3 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #4"); + + token = parser.ParseInput("123.4R "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #5"); + + token = parser.ParseInput("123.4# "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #6"); + + token = parser.ParseInput("123.4f "); + CheckType(token, typeof(float)); + Assert.True(Math.Abs((float)token.Value - 123.4) < eps, "Failed to read float(single) value"); + + token = parser.ParseInput("123.4! "); + CheckType(token, typeof(float)); + Assert.True(Math.Abs((float)token.Value - 123.4) < eps, "Failed to read float(single) value"); + + token = parser.ParseInput("123.4D "); + CheckType(token, typeof(decimal)); + Assert.True(Math.Abs((decimal)token.Value - 123.4m) < Convert.ToDecimal(eps), "Failed to read decimal value"); + + token = parser.ParseInput("123.4@ "); + CheckType(token, typeof(decimal)); + Assert.True(Math.Abs((decimal)token.Value - 123.4m) < Convert.ToDecimal(eps), "Failed to read decimal value"); + + //Quick parse + token = parser.ParseInput("1 "); + CheckType(token, typeof(int)); + //When going through quick parse path (for one-digit numbers), the NumberScanInfo record is not created and hence is absent in Attributes + Assert.True(token.Details == null, "Quick parse test failed: ScanDetails object is found in token - quick parse path should not produce this object."); + Assert.True((int)token.Value == 1, "Failed to read quick-parse value"); + } + + [Fact] + public void TestNumber_Python() + { + Parser parser; Token token; + + double eps = 0.0001; + parser = TestHelper.CreateParser(TerminalFactory.CreatePythonNumber("Number")); + + //Simple integers and suffixes + token = parser.ParseInput("123 "); + CheckType(token, typeof(int)); + Assert.True(token.Details != null, "ScanDetails object not found in token."); + Assert.True((int)token.Value == 123, "Failed to read int value"); + + token = parser.ParseInput("123L "); + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 123, "Failed to read long value"); + + //Hex representation + token = parser.ParseInput("0x012 "); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 0x012, "Failed to read hex int value"); + + token = parser.ParseInput("0x012l "); //with small "L" + CheckType(token, typeof(long)); + Assert.True((long)token.Value == 0x012, "Failed to read hex long value"); + + //Floating point types + token = parser.ParseInput("123.4 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #1"); + + token = parser.ParseInput("1234e-1 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 1234e-1) < eps, "Failed to read double value #2"); + + token = parser.ParseInput("12.34e+01 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #3"); + + token = parser.ParseInput("0.1234E3 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #4"); + + token = parser.ParseInput(".1234 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 0.1234) < eps, "Failed to read double value with leading dot"); + + token = parser.ParseInput("123. "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.0) < eps, "Failed to read double value with trailing dot"); + + //Big integer + string sbig = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; //100 digits + token = parser.ParseInput(sbig); + Assert.True(token.Value.ToString() == sbig, "Failed to read big integer value"); + + //Quick parse + token = parser.ParseInput("1 "); + CheckType(token, typeof(int)); + Assert.True(token.Details == null, "Quick parse test failed: ScanDetails object is found in token - quick parse path should produce this object."); + Assert.True((int)token.Value == 1, "Failed to read quick-parse value"); + } + + [Fact] + public void TestNumber_Scheme() + { + Parser parser; Token token; + + double eps = 0.0001; + parser = TestHelper.CreateParser(TerminalFactory.CreateSchemeNumber("Number")); + + //Just test default float value (double), and exp symbols (e->double, s->single, d -> double) + token = parser.ParseInput("123.4 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #1"); + + token = parser.ParseInput("1234e-1 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 1234e-1) < eps, "Failed to read single value #2"); + + token = parser.ParseInput("1234s-1 "); + CheckType(token, typeof(float)); + Assert.True(Math.Abs((float)token.Value - 1234e-1) < eps, "Failed to read single value #3"); + + token = parser.ParseInput("12.34d+01 "); + CheckType(token, typeof(double)); + Assert.True(Math.Abs((double)token.Value - 123.4) < eps, "Failed to read double value #4"); + }//method + + [Fact] + public void TestNumber_WithUnderscore() + { + Parser parser; Token token; + + var number = new NumberLiteral("number", NumberOptions.AllowUnderscore); + parser = TestHelper.CreateParser(number); + + //Simple integers and suffixes + token = parser.ParseInput("1_234_567"); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == 1234567, "Failed to read int value with underscores."); + }//method + + //There was a bug discovered in NumberLiteral - it cannot parse appropriately the int.MinValue value. + // This test ensures that the issue is fixed. + [Fact] + public void TestNumber_MinMaxValues() + { + Parser parser; Token token; + + var number = new NumberLiteral("number", NumberOptions.AllowSign); + number.DefaultIntTypes = new TypeCode[] { TypeCode.Int32 }; + parser = TestHelper.CreateParser(number); + var s = int.MinValue.ToString(); + token = parser.ParseInput(s); + Assert.False(token.IsError(), "Failed to scan int.MinValue, scanner returned an error."); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == int.MinValue, "Failed to scan int.MinValue, scanned value does not match."); + s = int.MaxValue.ToString(); + token = parser.ParseInput(s); + Assert.False(token.IsError(), "Failed to scan int.MaxValue, scanner returned an error."); + CheckType(token, typeof(int)); + Assert.True((int)token.Value == int.MaxValue, "Failed to read int.MaxValue"); + }//method + + private void CheckType(Token token, Type type) + { + Assert.False(token == null, "TryMatch returned null, while token was expected."); + Type vtype = token.Value.GetType(); + Assert.True(vtype == type, "Invalid target type, expected " + type.ToString() + ", found: " + vtype); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/OperatorTests.cs b/src/Irony.Tests/OperatorTests.cs new file mode 100644 index 0000000..f1a86f1 --- /dev/null +++ b/src/Irony.Tests/OperatorTests.cs @@ -0,0 +1,151 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class OperatorTests + { + #region Grammars + + public class OperatorGrammar : Grammar + { + public OperatorGrammar() + { + var id = new IdentifierTerminal("id"); + var binOp = new NonTerminal("binOp"); + var unOp = new NonTerminal("unOp"); + var expr = new NonTerminal("expr"); + var binExpr = new NonTerminal("binExpr"); + var unExpr = new NonTerminal("unExpr"); + + Root = expr; + expr.Rule = id | binExpr | unExpr; + binExpr.Rule = expr + binOp + expr; + binOp.Rule = ToTerm("+") | "-" | "*" | "/"; + unExpr.Rule = unOp + expr; + unOp.Rule = ToTerm("+") | "-"; + + RegisterOperators(10, "+", "-"); + RegisterOperators(20, "*", "/"); + MarkTransient(expr, binOp, unOp); + } + }//operator grammar class + + public class OperatorGrammarHintsOnTerms : Grammar + { + public OperatorGrammarHintsOnTerms() + { + var id = new IdentifierTerminal("id"); + var binOp = new NonTerminal("binOp"); + var unOp = new NonTerminal("unOp"); + var expr = new NonTerminal("expr"); + var binExpr = new NonTerminal("binExpr"); + var unExpr = new NonTerminal("unExpr"); + + Root = expr; + expr.Rule = id | binExpr | unExpr; + binExpr.Rule = expr + binOp + expr; + binOp.Rule = ToTerm("+") | "-" | "*" | "/"; + unExpr.Rule = unOp + expr; + var unOpHint = ImplyPrecedenceHere(30); // Force higher precedence than multiplication precedence + unOp.Rule = unOpHint + "+" | unOpHint + "-"; + RegisterOperators(10, "+", "-"); + RegisterOperators(20, "*", "/"); + MarkTransient(expr, binOp, unOp); + } + }//operator grammar class + + public class OperatorGrammarHintsOnNonTerms : Grammar + { + public OperatorGrammarHintsOnNonTerms() + { + var id = new IdentifierTerminal("id"); + var binOp = new NonTerminal("binOp"); + var unOp = new NonTerminal("unOp"); + var expr = new NonTerminal("expr"); + var binExpr = new NonTerminal("binExpr"); + var unExpr = new NonTerminal("unExpr"); + + Root = expr; + expr.Rule = id | binExpr | unExpr; + binExpr.Rule = expr + binOp + expr; + binOp.Rule = ToTerm("+") | "-" | "*" | "/"; + var unOpHint = ImplyPrecedenceHere(30); // Force higher precedence than multiplication precedence + unExpr.Rule = unOpHint + unOp + expr; + unOp.Rule = ToTerm("+") | "-"; + RegisterOperators(10, "+", "-"); + RegisterOperators(20, "*", "/"); + MarkTransient(expr, binOp, unOp); + } + }//operator grammar class + + #endregion Grammars + + [Fact] + public void TestOperatorPrecedence() + { + var grammar = new OperatorGrammar(); + var parser = new Parser(grammar); + TestHelper.CheckGrammarErrors(parser); + + var parseTree = parser.Parse("x + y * z"); + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "+", "Expected + operator."); //check that top operator is "+", not "*" + + parseTree = parser.Parse("x * y + z"); + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "+", "Expected + operator."); //check that top operator is "+", not "*" + + parseTree = parser.Parse("-x * y"); //should be interpreted as -(x*y), so top operator should be - + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "unExpr", "Expected unExpr."); + Assert.True(parseTree.Root.ChildNodes[0].Term.Name == "-", "Expected - operator."); //check that top operator is "+", not "*" + } + + //These tests check how implied precedence work. We use ImpliedPrecedenceHint to set precedence on unary +,- operators and make it + // higher than binary +,-. We make it even higher than * precedence, so that -x*y is interpreted as '(-x)*y', not like '-(x*y)' + // the second interpretation is chosen when there are no hints. + [Fact] + public void TestOperatorPrecedenceWithHints() + { + var grammar = new OperatorGrammarHintsOnTerms(); + var parser = new Parser(grammar); + TestHelper.CheckGrammarErrors(parser); + + var parseTree = parser.Parse("x + y * z"); + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "+", "Expected + operator."); //check that top operator is "+", not "*" + + parseTree = parser.Parse("-x * y"); //should be interpreted as (-x)*y, so top operator should be * + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "*", "Expected * operator."); //check that top operator is "+", not "*" + + var grammar2 = new OperatorGrammarHintsOnNonTerms(); + parser = new Parser(grammar2); + TestHelper.CheckGrammarErrors(parser); + + parseTree = parser.Parse("x + y * z"); + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "+", "Expected + operator."); //check that top operator is "+", not "*" + + parseTree = parser.Parse("-x*y"); //should be interpreted as (-x)*y, so top operator should be * + TestHelper.CheckParseErrors(parseTree); + Assert.True(parseTree.Root != null, "Root not found."); + Assert.True(parseTree.Root.Term.Name == "binExpr", "Expected binExpr."); + Assert.True(parseTree.Root.ChildNodes[1].Term.Name == "*", "Expected * operator."); //check that top operator is "+", not "*" + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/RegExLiteralTests.cs b/src/Irony.Tests/RegExLiteralTests.cs new file mode 100644 index 0000000..f8a1985 --- /dev/null +++ b/src/Irony.Tests/RegExLiteralTests.cs @@ -0,0 +1,29 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using System.Text.RegularExpressions; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class RegexLiteralTests + { + //The following test method and a fix are contributed by ashmind codeplex user + [Fact] + public void TestRegExLiteral() + { + Parser parser; Token token; + + var term = new RegexLiteral("RegEx"); + parser = TestHelper.CreateParser(term); + token = parser.ParseInput(@"/abc\\\/de/gm "); + Assert.False(token == null, "Failed to produce a token on valid string."); + Assert.True(term == token.Terminal, "Failed to scan a string - invalid Terminal in the returned token."); + Assert.False(token.Value == null, "Token Value field is null - should be Regex object."); + var regex = token.Value as Regex; + Assert.False(regex == null, "Failed to create Regex object."); + var match = regex.Match(@"00abc\/de00"); + Assert.True(match.Index == 2, "Failed to match a regular expression"); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/Sanchime.Irony.Tests.csproj b/src/Irony.Tests/Sanchime.Irony.Tests.csproj new file mode 100644 index 0000000..7d78151 --- /dev/null +++ b/src/Irony.Tests/Sanchime.Irony.Tests.csproj @@ -0,0 +1,20 @@ + + + + net6.0 + + false + + + + + + + + + + + + + + diff --git a/src/Irony.Tests/StringLiteralTests.cs b/src/Irony.Tests/StringLiteralTests.cs new file mode 100644 index 0000000..76b2d12 --- /dev/null +++ b/src/Irony.Tests/StringLiteralTests.cs @@ -0,0 +1,142 @@ +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public class StringLiteralTests + { + //handy option for stringLiteral tests: we use single quotes in test strings, and they are replaced by double quotes here + private static string ReplaceQuotes(string input) + { + return input.Replace("'", "\""); + } + + //The following test method and a fix are contributed by ashmind codeplex user + [Fact] + public void TestString_QuoteJustBeforeEof() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(new StringLiteral("String", "'")); + token = parser.ParseInput(@"'"); + Assert.True(TokenCategory.Error == token.Terminal.Category, "Incorrect string was not parsed as syntax error."); + } + + [Fact] + public void TestString_Python() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(TerminalFactory.CreatePythonString("String")); + //1. Single quotes + token = parser.ParseInput(@"'00\a\b\t\n\v\f\r\'\\00' "); + Assert.True((string)token.Value == "00\a\b\t\n\v\f\r\'\\00", "Failed to process escaped characters."); + token = parser.ParseInput("'abcd\nefg' "); + Assert.True(token.IsError(), "Failed to detect erroneous multi-line string."); + token = parser.ParseInput("'''abcd\nefg''' "); + Assert.True((string)token.Value == "abcd\nefg", "Failed to process line break in triple-quote string."); + token = parser.ParseInput(@"'''abcd\" + "\n" + "efg''' "); + Assert.True((string)token.Value == "abcd\nefg", "Failed to process escaped line-break char."); + token = parser.ParseInput(@"r'00\a\b\t\n\v\f\r00' "); + Assert.True((string)token.Value == @"00\a\b\t\n\v\f\r00", "Failed to process string with disabled escapes."); + + //2. Double quotes - we use TryMatchDoubles which replaces single quotes with doubles and then calls TryMatch + token = parser.ParseInput(ReplaceQuotes(@"'00\a\b\t\n\v\f\r\'\\00' ")); + Assert.True((string)token.Value == "00\a\b\t\n\v\f\r\"\\00", "Failed to process escaped characters."); + token = parser.ParseInput(ReplaceQuotes("'abcd\nefg' ")); + Assert.True(token.IsError(), "Failed to detect erroneous multi-line string. (Double quotes)"); + token = parser.ParseInput(ReplaceQuotes("'''abcd\nefg''' ")); + Assert.True((string)token.Value == "abcd\nefg", "Failed to process line break in triple-quote string. (Double quotes)"); + token = parser.ParseInput(ReplaceQuotes(@"'''abcd\" + "\n" + "efg''' ")); + Assert.True((string)token.Value == "abcd\nefg", "Failed to process escaped line-break char. (Double quotes)"); + token = parser.ParseInput(ReplaceQuotes(@"r'00\a\b\t\n\v\f\r00' ")); + Assert.True((string)token.Value == @"00\a\b\t\n\v\f\r00", "Failed to process string with disabled escapes. (Double quotes)"); + }//method + + [Fact] + public void TestString_CSharp() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(TerminalFactory.CreateCSharpString("String")); + + token = parser.ParseInput('"' + @"abcd\\" + '"' + " "); + Assert.True((string)token.Value == @"abcd\", "Failed to process double escape char at the end of the string."); + + token = parser.ParseInput('"' + @"abcd\\\" + '"' + "efg" + '"' + " "); + Assert.True((string)token.Value == @"abcd\" + '"' + "efg", @"Failed to process '\\\ + double-quote' inside the string."); + + //with Escapes + token = parser.ParseInput(ReplaceQuotes(@"'00\a\b\t\n\v\f\r\'\\00' ")); + Assert.True((string)token.Value == "00\a\b\t\n\v\f\r\"\\00", "Failed to process escaped characters."); + token = parser.ParseInput(ReplaceQuotes("'abcd\nefg' ")); + Assert.True(token.IsError(), "Failed to detect erroneous multi-line string."); + //with disabled escapes + token = parser.ParseInput(ReplaceQuotes(@"@'00\a\b\t\n\v\f\r00' ")); + Assert.True((string)token.Value == @"00\a\b\t\n\v\f\r00", "Failed to process @-string with disabled escapes."); + token = parser.ParseInput(ReplaceQuotes("@'abc\ndef' ")); + Assert.True((string)token.Value == "abc\ndef", "Failed to process @-string with linebreak."); + //Unicode and hex + token = parser.ParseInput(ReplaceQuotes(@"'abc\u0040def' ")); + Assert.True((string)token.Value == "abc@def", "Failed to process unicode escape \\u."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\U00000040def' ")); + Assert.True((string)token.Value == "abc@def", "Failed to process unicode escape \\u."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\x0040xyz' ")); + Assert.True((string)token.Value == "abc@xyz", "Failed to process hex escape (4 digits)."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\x040xyz' ")); + Assert.True((string)token.Value == "abc@xyz", "Failed to process hex escape (3 digits)."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\x40xyz' ")); + Assert.True((string)token.Value == "abc@xyz", "Failed to process hex escape (2 digits)."); + //octals + token = parser.ParseInput(ReplaceQuotes(@"'abc\0601xyz' ")); //the last digit "1" should not be included in octal number + Assert.True((string)token.Value == "abc01xyz", "Failed to process octal escape (3 + 1 digits)."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\060xyz' ")); + Assert.True((string)token.Value == "abc0xyz", "Failed to process octal escape (3 digits)."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\60xyz' ")); + Assert.True((string)token.Value == "abc0xyz", "Failed to process octal escape (2 digits)."); + token = parser.ParseInput(ReplaceQuotes(@"'abc\0xyz' ")); + Assert.True((string)token.Value == "abc\0xyz", "Failed to process octal escape (1 digit)."); + } + + [Fact] + public void TestString_CSharpChar() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(TerminalFactory.CreateCSharpChar("Char")); + token = parser.ParseInput("'a' "); + Assert.True((char)token.Value == 'a', "Failed to process char."); + token = parser.ParseInput(@"'\n' "); + Assert.True((char)token.Value == '\n', "Failed to process new-line char."); + token = parser.ParseInput(@"'' "); + Assert.True(token.IsError(), "Failed to recognize empty quotes as invalid char literal."); + token = parser.ParseInput(@"'abc' "); + Assert.True(token.IsError(), "Failed to recognize multi-char sequence as invalid char literal."); + //Note: unlike strings, c# char literals don't allow the "@" prefix + } + + [Fact] + public void TestString_VB() + { + Parser parser; Token token; + + parser = TestHelper.CreateParser(TerminalFactory.CreateVbString("String")); + //VB has no escapes - so make sure term doesn't catch any escapes + token = parser.ParseInput(ReplaceQuotes(@"'00\a\b\t\n\v\f\r\\00' ")); + Assert.True((string)token.Value == @"00\a\b\t\n\v\f\r\\00", "Failed to process string with \\ characters."); + token = parser.ParseInput(ReplaceQuotes("'abcd\nefg' ")); + Assert.True(token.IsError(), "Failed to detect erroneous multi-line string."); + token = parser.ParseInput(ReplaceQuotes("'abcd''efg' ")); + Assert.True((string)token.Value == "abcd\"efg", "Failed to process a string with doubled double-quote char."); + //Test char suffix "c" + token = parser.ParseInput(ReplaceQuotes("'A'c ")); + Assert.True((char)token.Value == 'A', "Failed to process a character"); + token = parser.ParseInput(ReplaceQuotes("''c ")); + Assert.True(token.IsError(), "Failed to detect an error for an empty char."); + token = parser.ParseInput(ReplaceQuotes("'ab'C ")); + Assert.True(token.IsError(), "Failed to detect error in multi-char sequence."); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony.Tests/TestHelper.cs b/src/Irony.Tests/TestHelper.cs new file mode 100644 index 0000000..96b3328 --- /dev/null +++ b/src/Irony.Tests/TestHelper.cs @@ -0,0 +1,74 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using Sanchime.Irony.Parsing.Scanners; +using Sanchime.Irony.Parsing.Terminals; +using System; +using Xunit; + +namespace Sanchime.Irony.Tests +{ + public static class TestHelper + { + //A skeleton for a grammar with a single terminal, followed by optional terminator + private class TerminalTestGrammar : Grammar + { + public string Terminator; + + public TerminalTestGrammar(Terminal terminal, string terminator = null) : base(caseSensitive: true) + { + Terminator = terminator; + var rule = new BnfExpression(terminal); + if (Terminator != null) + { + MarkReservedWords(Terminator); + rule += Terminator; + } + Root = new NonTerminal("Root"); + Root.Rule = rule; + } + }//class + + public static Parser CreateParser(Terminal terminal, string terminator = "end") + { + var grammar = new TerminalTestGrammar(terminal, terminator); + var parser = new Parser(grammar); + CheckGrammarErrors(parser); + return parser; + } + + public static void CheckGrammarErrors(Parser parser) + { + var errors = parser.Language.Errors; + if (errors.Count > 0) + throw new Exception("Unexpected grammar contains error(s): " + string.Join("\n", errors)); + } + + public static void CheckParseErrors(ParseTree parseTree) + { + if (parseTree.HasErrors()) + throw new Exception("Unexpected parse error(s): " + string.Join("\n", parseTree.ParserMessages)); + } + + public static Token ParseInput(this Parser parser, string input, bool useTerminator = true) + { + var g = (TerminalTestGrammar)parser.Language.Grammar; + useTerminator &= g.Terminator != null; + if (useTerminator) + input += " " + g.Terminator; + var tree = parser.Parse(input); + //If error, then return this error token, this is probably what is expected. + var first = tree.Tokens[0]; + if (first.IsError()) + return first; + //Verify that last or before-last token is a terminator + if (useTerminator) + { + Assert.True(tree.Tokens.Count >= 2, "Wrong # of tokens - expected at least 2. Input: " + input); + var count = tree.Tokens.Count; + //The last is EOF, the one before last should be a terminator + Assert.True(g.Terminator == tree.Tokens[count - 2].Text, "Input terminator not found in the second token. Input: " + input); + } + return tree.Tokens[0]; + } + }//class +} \ No newline at end of file diff --git a/src/Irony.Tests/TokenPreviewResolution/ConflictGrammars.cs b/src/Irony.Tests/TokenPreviewResolution/ConflictGrammars.cs new file mode 100644 index 0000000..7078a64 --- /dev/null +++ b/src/Irony.Tests/TokenPreviewResolution/ConflictGrammars.cs @@ -0,0 +1,119 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers.SpecialActionsHints; +using Sanchime.Irony.Parsing.Terminals; + +namespace Sanchime.Irony.Tests.TokenPreviewResolution +{ + [Language("Grammar with conflicts, no hints", "1.1", "Grammar with conflicts, no hints.")] + public class ConflictGrammarNoHints : Grammar + { + public ConflictGrammarNoHints() + : base(true) + { + var name = new IdentifierTerminal("id"); + + var stmt = new NonTerminal("Statement"); + var stmtList = new NonTerminal("StatementList"); + var fieldModifier = new NonTerminal("fieldModifier"); + var propModifier = new NonTerminal("propModifier"); + var methodModifier = new NonTerminal("methodModifier"); + var fieldModifierList = new NonTerminal("fieldModifierList"); + var propModifierList = new NonTerminal("propModifierList"); + var methodModifierList = new NonTerminal("methodModifierList"); + var fieldDef = new NonTerminal("fieldDef"); + var propDef = new NonTerminal("propDef"); + var methodDef = new NonTerminal("methodDef"); + + //Rules + Root = stmtList; + stmtList.Rule = MakePlusRule(stmtList, stmt); + stmt.Rule = fieldDef | propDef | methodDef; + fieldDef.Rule = fieldModifierList + name + name + ";"; + propDef.Rule = propModifierList + name + name + "{" + "}"; + methodDef.Rule = methodModifierList + name + name + "(" + ")" + "{" + "}"; + fieldModifierList.Rule = MakeStarRule(fieldModifierList, fieldModifier); + propModifierList.Rule = MakeStarRule(propModifierList, propModifier); + methodModifierList.Rule = MakeStarRule(methodModifierList, methodModifier); + + // That's the key of the problem: 3 modifiers have common members + // so parser automaton has hard time deciding which modifiers list to produce - + // is it a field, prop or method we are beginning to parse? + fieldModifier.Rule = ToTerm("public") | "private" | "readonly" | "volatile"; + propModifier.Rule = ToTerm("public") | "private" | "readonly" | "override"; + methodModifier.Rule = ToTerm("public") | "private" | "override"; + + MarkReservedWords("public", "private", "readonly", "volatile", "override"); + } + } + + [Language("Grammar with conflicts #2", "1.1", "Conflict grammar with hints added to productions.")] + public class ConflictGrammarWithHintsInRules : Grammar + { + public ConflictGrammarWithHintsInRules() : base(true) + { + var name = new IdentifierTerminal("id"); + + var definition = new NonTerminal("definition"); + var fieldDef = new NonTerminal("fieldDef"); + var propDef = new NonTerminal("propDef"); + var fieldModifier = new NonTerminal("fieldModifier"); + var propModifier = new NonTerminal("propModifier"); + + definition.Rule = fieldDef | propDef; + fieldDef.Rule = fieldModifier + name + name + ";"; + propDef.Rule = propModifier + name + name + "{" + "}"; + var fieldHint = ReduceIf(";", comesBefore: "{"); + fieldModifier.Rule = "public" + fieldHint | "private" + fieldHint | "readonly"; + propModifier.Rule = ToTerm("public") | "private" | "override"; + + Root = definition; + } + }//class + + [Language("Grammar with conflicts #4", "1.1", "Test conflict grammar with conflicts and hints: hints are added to non-terminals.")] + public class ConflictGrammarWithHintsOnTerms : Grammar + { + public ConflictGrammarWithHintsOnTerms() + : base(true) + { + var name = new IdentifierTerminal("id"); + + var stmt = new NonTerminal("Statement"); + var stmtList = new NonTerminal("StatementList"); + var fieldModifier = new NonTerminal("fieldModifier"); + var propModifier = new NonTerminal("propModifier"); + var methodModifier = new NonTerminal("methodModifier"); + var fieldModifierList = new NonTerminal("fieldModifierList"); + var propModifierList = new NonTerminal("propModifierList"); + var methodModifierList = new NonTerminal("methodModifierList"); + var fieldDef = new NonTerminal("fieldDef"); + var propDef = new NonTerminal("propDef"); + var methodDef = new NonTerminal("methodDef"); + + //Rules + Root = stmtList; + stmtList.Rule = MakePlusRule(stmtList, stmt); + stmt.Rule = fieldDef | propDef | methodDef; + fieldDef.Rule = fieldModifierList + name + name + ";"; + propDef.Rule = propModifierList + name + name + "{" + "}"; + methodDef.Rule = methodModifierList + name + name + "(" + ")" + "{" + "}"; + fieldModifierList.Rule = MakeStarRule(fieldModifierList, fieldModifier); + propModifierList.Rule = MakeStarRule(propModifierList, propModifier); + methodModifierList.Rule = MakeStarRule(methodModifierList, methodModifier); + + fieldModifier.Rule = ToTerm("public") | "private" | "readonly" | "volatile"; + propModifier.Rule = ToTerm("public") | "private" | "readonly" | "override"; + methodModifier.Rule = ToTerm("public") | "private" | "override"; + + // conflict resolution + var fieldHint = new TokenPreviewHint(PreferredActionType.Reduce, thisSymbol: ";", comesBefore: new string[] { "(", "{" }); + fieldModifier.AddHintToAll(fieldHint); + fieldModifierList.AddHintToAll(fieldHint); + var propHint = new TokenPreviewHint(PreferredActionType.Reduce, thisSymbol: "{", comesBefore: new string[] { ";", "(" }); + propModifier.AddHintToAll(propHint); + propModifierList.AddHintToAll(propHint); + + MarkReservedWords("public", "private", "readonly", "volatile", "override"); + } + } +} \ No newline at end of file diff --git a/src/Irony.Tests/TokenPreviewResolution/ConflictResolutionTests.cs b/src/Irony.Tests/TokenPreviewResolution/ConflictResolutionTests.cs new file mode 100644 index 0000000..ce80260 --- /dev/null +++ b/src/Irony.Tests/TokenPreviewResolution/ConflictResolutionTests.cs @@ -0,0 +1,117 @@ +using Sanchime.Irony.Parsing.Grammars; +using Sanchime.Irony.Parsing.Parsers; +using System.Linq; +using Xunit; + +namespace Sanchime.Irony.Tests.TokenPreviewResolution +{ + public class ConflictResolutionTests + { + // samples to be parsed + private const string FieldSample = "private int SomeField;"; + + private const string PropertySample = "public string Name {}"; + private const string FieldListSample = "private int Field1; public string Field2;"; + + private const string MixedListSample = @" + public int Size {} + private string TableName; + override void Run() + { + }"; + + // Full grammar, no hints - expect errors --------------------------------------------------------------------- + [Fact] + public void TestConflictGrammarNoHints_HasErrors() + { + var grammar = new ConflictGrammarNoHints(); + var parser = new Parser(grammar); + Assert.True(parser.Language.Errors.Count > 0); + //Cannot parse mixed list + var sample = MixedListSample; + var tree = parser.Parse(sample); + Assert.NotNull(tree); + Assert.True(tree.HasErrors()); + } + + // Hints in Rules -------------------------------------------------------------------------- + [Fact] + public void TestConflictGrammarWithHintsOnRules() + { + var grammar = new ConflictGrammarWithHintsInRules(); + var parser = new Parser(grammar); + Assert.True(parser.Language.Errors.Count == 0); + // Field sample + var sample = FieldSample; + var tree = parser.Parse(sample); + Assert.NotNull(tree); + Assert.False(tree.HasErrors()); + + Assert.NotNull(tree.Root); + var term = tree.Root.Term as NonTerminal; + Assert.NotNull(term); + Assert.Equal("definition", term.Name); + + Assert.Single(tree.Root.ChildNodes); + var modNode = tree.Root.ChildNodes[0].ChildNodes[0]; + Assert.Equal("fieldModifier", modNode.Term.Name); + + //Property + sample = PropertySample; + tree = parser.Parse(sample); + Assert.NotNull(tree); + Assert.False(tree.HasErrors()); + + Assert.NotNull(tree.Root); + term = tree.Root.Term as NonTerminal; + Assert.NotNull(term); + Assert.Equal("definition", term.Name); + + Assert.Single(tree.Root.ChildNodes); + modNode = tree.Root.ChildNodes[0].ChildNodes[0]; + Assert.Equal("propModifier", modNode.Term.Name); + } + + //Hints on terms --------------------------------------------------------------------- + [Fact] + public void TestConflictGrammar_HintsOnTerms() + { + var grammar = new ConflictGrammarWithHintsOnTerms(); + var parser = new Parser(grammar); + Assert.True(parser.Language.Errors.Count == 0); + + //Field list sample + var sample = FieldListSample; + var tree = parser.Parse(sample); + Assert.NotNull(tree); + Assert.False(tree.HasErrors()); + + Assert.NotNull(tree.Root); + var term = tree.Root.Term as NonTerminal; + Assert.NotNull(term); + Assert.Equal("StatementList", term.Name); + + Assert.Equal(2, tree.Root.ChildNodes.Count); + var nodes = tree.Root.ChildNodes.Select(t => t.ChildNodes[0]).ToArray(); + Assert.Equal("fieldDef", nodes[0].Term.Name); + Assert.Equal("fieldDef", nodes[1].Term.Name); + + //Mixed sample + sample = MixedListSample; + tree = parser.Parse(sample); + Assert.NotNull(tree); + Assert.False(tree.HasErrors()); + + Assert.NotNull(tree.Root); + term = tree.Root.Term as NonTerminal; + Assert.NotNull(term); + Assert.Equal("StatementList", term.Name); + + Assert.Equal(3, tree.Root.ChildNodes.Count); + nodes = tree.Root.ChildNodes.Select(t => t.ChildNodes[0]).ToArray(); + Assert.Equal("propDef", nodes[0].Term.Name); + Assert.Equal("fieldDef", nodes[1].Term.Name); + Assert.Equal("methodDef", nodes[2].Term.Name); + } + } +} \ No newline at end of file diff --git a/src/Irony/Ast/AstBuilder.cs b/src/Irony/Ast/AstBuilder.cs new file mode 100644 index 0000000..7c2327f --- /dev/null +++ b/src/Irony/Ast/AstBuilder.cs @@ -0,0 +1,133 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Reflection.Emit; + +namespace Sanchime.Irony.Ast +{ + public class AstBuilder + { + public AstContext Context; + + public AstBuilder(AstContext context) + { + Context = context; + } + + public virtual void BuildAst(ParseTree parseTree) + { + if (parseTree.Root == null) + return; + Context.Messages = parseTree.ParserMessages; + if (!Context.Language.AstDataVerified) + VerifyLanguageData(); + if (Context.Language.ErrorLevel == GrammarErrorLevel.Error) + return; + BuildAst(parseTree.Root); + } + + public virtual void VerifyLanguageData() + { + var gd = Context.Language.GrammarData; + //Collect all terminals and non-terminals + var terms = new BnfTermSet(); + //SL does not understand co/contravariance, so doing merge one-by-one + foreach (var t in gd.Terminals) terms.Add(t); + foreach (var t in gd.NonTerminals) terms.Add(t); + var missingList = new BnfTermList(); + foreach (var term in terms) + { + if (term is Terminal terminal && terminal.Category != TokenCategory.Content) continue; //only content terminals + if (term.Flags.IsSet(TermFlags.NoAstNode)) continue; + var config = term.AstConfig; + if (config.NodeCreator != null || config.DefaultNodeCreator != null) continue; + //We must check NodeType + if (config.NodeType == null) + config.NodeType = GetDefaultNodeType(term); + if (config.NodeType == null) + missingList.Add(term); + else + config.DefaultNodeCreator = CompileDefaultNodeCreator(config.NodeType); + } + if (missingList.Count > 0) + // AST node type is not specified for term {0}. Either assign Term.AstConfig.NodeType, or specify default type(s) in AstBuilder. + Context.AddMessage(ErrorLevel.Error, SourceLocation.Empty, Resources.ErrNodeTypeNotSetOn, string.Join(", ", missingList)); + Context.Language.AstDataVerified = true; + } + + protected virtual Type GetDefaultNodeType(BnfTerm term) => term switch + { + NumberLiteral or StringLiteral => Context.DefaultLiteralNodeType, + IdentifierTerminal => Context.DefaultIdentifierNodeType, + _ => Context.DefaultNodeType + }; + + public virtual void BuildAst(ParseTreeNode parseNode) + { + var term = parseNode.Term; + if (term.Flags.IsSet(TermFlags.NoAstNode) || parseNode.AstNode != null) return; + //children first + var processChildren = !parseNode.Term.Flags.IsSet(TermFlags.AstDelayChildren) && parseNode.ChildNodes.Count > 0; + if (processChildren) + { + var mappedChildNodes = parseNode.GetMappedChildNodes(); + for (int i = 0; i < mappedChildNodes.Count; i++) + BuildAst(mappedChildNodes[i]); + } + // 创建节点 + //We know that either NodeCreator or DefaultNodeCreator is set; VerifyAstData create the DefaultNodeCreator + var config = term.AstConfig; + if (config.NodeCreator != null) + { + config.NodeCreator(Context, parseNode); + // We assume that Node creator method creates node and initializes it, so parser does not need to call + // IAstNodeInit.Init() method on node object. But we do call AstNodeCreated custom event on term. + } + else + { + //Invoke the default creator compiled when we verified the data + parseNode.AstNode = config.DefaultNodeCreator(); + //Initialize node + if (parseNode.AstNode is IAstNodeInit iInit) + iInit.Init(Context, parseNode); + } + //Invoke the event on term + term.OnAstNodeCreated(parseNode); + }//method + + //Contributed by William Horner (wmh) + private DefaultAstNodeCreator CompileDefaultNodeCreator(Type nodeType) + { + var constr = nodeType.GetConstructor(Type.EmptyTypes); + var method = new DynamicMethod("CreateAstNode", nodeType, Type.EmptyTypes); + var il = method.GetILGenerator(); + il.Emit(OpCodes.Newobj, constr); + il.Emit(OpCodes.Ret); + var result = (DefaultAstNodeCreator)method.CreateDelegate(typeof(DefaultAstNodeCreator)); + return result; + } + + /* + //A list of of child nodes based on AstPartsMap. By default, the same as ChildNodes + private ParseTreeNodeList _mappedChildNodes; + public ParseTreeNodeList MappedChildNodes { + get { + if (_mappedChildNodes == null) + _mappedChildNodes = GetMappedChildNodes(); + return _mappedChildNodes; + } + } + */ + }//class +} \ No newline at end of file diff --git a/src/Irony/Ast/AstContext.cs b/src/Irony/Ast/AstContext.cs new file mode 100644 index 0000000..fd25de5 --- /dev/null +++ b/src/Irony/Ast/AstContext.cs @@ -0,0 +1,39 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Ast +{ + public class AstContext + { + public readonly LanguageData Language; + public Type DefaultNodeType; + public Type DefaultLiteralNodeType; //default node type for literals + public Type DefaultIdentifierNodeType; //default node type for identifiers + + public Dictionary Values = new Dictionary(); + public LogMessageList Messages; + + public AstContext(LanguageData language) + { + Language = language; + } + + public void AddMessage(ErrorLevel level, SourceLocation location, string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + Messages.Add(new LogMessage(level, location, message, null)); + } + }//class +}//ns \ No newline at end of file diff --git a/src/Irony/Ast/AstExtensions.cs b/src/Irony/Ast/AstExtensions.cs new file mode 100644 index 0000000..f75a792 --- /dev/null +++ b/src/Irony/Ast/AstExtensions.cs @@ -0,0 +1,19 @@ +namespace Sanchime.Irony.Ast +{ + public static class AstExtensions + { + public static ParseTreeNodeList GetMappedChildNodes(this ParseTreeNode node) + { + var term = node.Term; + if (!term.HasAstConfig()) + return node.ChildNodes; + var map = term.AstConfig.PartsMap; + //If no map then mapped list is the same as original + if (map == null) return node.ChildNodes; + //Create mapped list + var result = new ParseTreeNodeList(); + result.AddRange(from key in map select node.ChildNodes[key]); + return result; + } + } +} \ No newline at end of file diff --git a/src/Irony/Ast/AstInterfaces.cs b/src/Irony/Ast/AstInterfaces.cs new file mode 100644 index 0000000..945bcba --- /dev/null +++ b/src/Irony/Ast/AstInterfaces.cs @@ -0,0 +1,43 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Collections; + +namespace Sanchime.Irony.Ast +{ + // Grammar Explorer uses this interface to discover and display the AST tree after parsing the input + // (Grammar Explorer additionally uses ToString method of the node to get the text representation of the node) + public interface IBrowsableAstNode + { + int Position { get; } + + IEnumerable GetChildNodes(); + } + + // Note that we expect more than one interpreter/AST implementation. + // Irony.Interpreter namespace provides just one of them. That's why the following AST interfaces + // are here, in top Irony namespace and not in Irony.Interpreter.Ast. + // In the future, I plan to introduce advanced interpreter, with its own set of AST classes - it will live + // in a separate assembly Irony.Interpreter2.dll. + + // Basic interface for AST nodes; Init method is the chance for AST node to get references to its child nodes, and all + // related information gathered during parsing + // Implementing this interface is a minimum required from custom AST node class to enable its creation by Irony AST builder + // Alternatively, if your custom AST node class does not implement this interface then you can create + // and initialize node instances using AstNodeCreator delegate attached to corresponding non-terminal in your grammar. + public interface IAstNodeInit + { + void Init(AstContext context, ParseTreeNode parseNode); + } +} \ No newline at end of file diff --git a/src/Irony/Ast/AstNodeConfig.cs b/src/Irony/Ast/AstNodeConfig.cs new file mode 100644 index 0000000..3bead84 --- /dev/null +++ b/src/Irony/Ast/AstNodeConfig.cs @@ -0,0 +1,59 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Ast +{ + public class AstNodeEventArgs : EventArgs + { + public AstNodeEventArgs(ParseTreeNode parseTreeNode) + { + ParseTreeNode = parseTreeNode; + } + + public readonly ParseTreeNode ParseTreeNode; + + public object AstNode + { + get { return ParseTreeNode.AstNode; } + } + } + + public delegate void AstNodeCreator(AstContext context, ParseTreeNode parseNode); + + public delegate object DefaultAstNodeCreator(); + + public class AstNodeConfig + { + public Type NodeType; + public object Data; //config data passed to AstNode + public AstNodeCreator NodeCreator; // a custom method for creating AST nodes + public DefaultAstNodeCreator DefaultNodeCreator; //default method for creating AST nodes; compiled dynamic method, wrapper around "new nodeType();" + + // An optional map (selector, filter) of child AST nodes. This facility provides a way to adjust the "map" of child nodes in various languages to + // the structure of a standard AST nodes (that can be shared betweeen languages). + // ParseTreeNode object has two properties containing list nodes: ChildNodes and MappedChildNodes. + // If term.AstPartsMap is null, these two child node lists are identical and contain all child nodes. + // If AstParts is not null, then MappedChildNodes will contain child nodes identified by indexes in the map. + // For example, if we set + // term.AstPartsMap = new int[] {1, 4, 2}; + // then MappedChildNodes will contain 3 child nodes, which are under indexes 1, 4, 2 in ChildNodes list. + // The mapping is performed in CoreParser.cs, method CheckCreateMappedChildNodeList. + public int[] PartsMap; + + public bool CanCreateNode() + { + return NodeCreator != null || NodeType != null; + } + }//AstNodeConfig class +} \ No newline at end of file diff --git a/src/Irony/GlobalUsings.cs b/src/Irony/GlobalUsings.cs new file mode 100644 index 0000000..d52cc78 --- /dev/null +++ b/src/Irony/GlobalUsings.cs @@ -0,0 +1,8 @@ +global using Sanchime.Irony.Parsing.Data; +global using Sanchime.Irony.Parsing.Grammars; +global using Sanchime.Irony.Parsing.Parsers; +global using Sanchime.Irony.Parsing.Scanners; +global using Sanchime.Irony.Parsing.Terminals; +global using Sanchime.Irony.Utilities; +global using System.Text; +global using System.Text.RegularExpressions; \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/GrammarDataBuilder.cs b/src/Irony/Parsing/Data/Construction/GrammarDataBuilder.cs new file mode 100644 index 0000000..f1b8e71 --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/GrammarDataBuilder.cs @@ -0,0 +1,316 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Data +{ + internal class GrammarDataBuilder + { + private LanguageData _language; + private Grammar _grammar; + private GrammarData _grammarData; + private int _unnamedCount; //internal counter for generating names for unnamed non-terminals + internal int _lastItemId; //each LR0Item gets its unique ID, last assigned (max) Id is kept in this field + + internal GrammarDataBuilder(LanguageData language) + { + _language = language; + _grammar = _language.Grammar; + } + + internal void Build() + { + _grammarData = _language.GrammarData; + CreateAugmentedRoots(); + CollectTermsFromGrammar(); + InitTermLists(); + FillOperatorReportGroup(); + CreateProductions(); + ComputeNonTerminalsNullability(_grammarData); + ComputeTailsNullability(_grammarData); + ValidateGrammar(); + } + + private void CreateAugmentedRoots() + { + _grammarData.AugmentedRoot = CreateAugmentedRoot(_grammar.Root); + foreach (var snippetRoot in _grammar.SnippetRoots) + _grammarData.AugmentedSnippetRoots.Add(CreateAugmentedRoot(snippetRoot)); + } + + private NonTerminal CreateAugmentedRoot(NonTerminal root) + { + var result = new NonTerminal(root.Name + "'", root + _grammar.Eof); + result.SetFlag(TermFlags.NoAstNode); //mark that we don't need AST node here + return result; + } + + private void CollectTermsFromGrammar() + { + _unnamedCount = 0; + _grammarData.AllTerms.Clear(); + //Start with NonGrammarTerminals, and set IsNonGrammar flag + foreach (Terminal t in _grammarData.Grammar.NonGrammarTerminals) + { + t.SetFlag(TermFlags.IsNonGrammar); + _grammarData.AllTerms.Add(t); + } + //Add main root + CollectTermsRecursive(_grammarData.AugmentedRoot); + foreach (var augmRoot in _grammarData.AugmentedSnippetRoots) + CollectTermsRecursive(augmRoot); + //Add syntax error explicitly + _grammarData.AllTerms.Add(_grammar.SyntaxError); + } + + private void CollectTermsRecursive(BnfTerm term) + { + if (_grammarData.AllTerms.Contains(term)) return; + _grammarData.AllTerms.Add(term); + NonTerminal nt = term as NonTerminal; + if (nt == null) return; + + if (string.IsNullOrEmpty(nt.Name)) + { + if (nt.Rule != null && !string.IsNullOrEmpty(nt.Rule.Name)) + nt.Name = nt.Rule.Name; + else + nt.Name = "Unnamed" + _unnamedCount++; + } + if (nt.Rule == null) + _language.Errors.AddAndThrow(GrammarErrorLevel.Error, null, Resources.ErrNtRuleIsNull, nt.Name); + //check all child elements + foreach (BnfTermList elemList in nt.Rule.Data) + for (int i = 0; i < elemList.Count; i++) + { + BnfTerm child = elemList[i]; + if (child == null) + { + _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrRuleContainsNull, nt.Name, i); + continue; //for i loop + } + //Check for nested expression - convert to non-terminal + BnfExpression expr = child as BnfExpression; + if (expr != null) + { + child = new NonTerminal(null, expr); + elemList[i] = child; + } + CollectTermsRecursive(child); + }//for i + }//method + + private void FillOperatorReportGroup() + { + foreach (var group in _grammar.TermReportGroups) + if (group.GroupType == TermReportGroupType.Operator) + { + foreach (var term in _grammarData.Terminals) + if (term.Flags.IsSet(TermFlags.IsOperator)) + group.Terminals.Add(term); + return; + } + } + + private void InitTermLists() + { + //Collect terminals and NonTerminals + var empty = _grammar.Empty; + foreach (BnfTerm term in _grammarData.AllTerms) + { //remember - we may have hints, so it's not only terminals and non-terminals + if (term is NonTerminal terminal) _grammarData.NonTerminals.Add(terminal); + if (term is Terminal terminal1 && term != empty) _grammarData.Terminals.Add(terminal1); + } + //Mark keywords - any "word" symbol directly mentioned in the grammar + foreach (var term in _grammarData.Terminals) + { + var symTerm = term as KeyTerm; + if (symTerm == null) continue; + if (!string.IsNullOrEmpty(symTerm.Text) && char.IsLetter(symTerm.Text[0])) + symTerm.SetFlag(TermFlags.IsKeyword); + }//foreach term + //Init all terms + foreach (var term in _grammarData.AllTerms) + term.Init(_grammarData); + }//method + + private void CreateProductions() + { + _lastItemId = 0; + //CheckWrapTailHints() method may add non-terminals on the fly, so we have to use for loop here (not foreach) + foreach (var nt in _grammarData.NonTerminals) + { + nt.Productions.Clear(); + //Get data (sequences) from both Rule and ErrorRule + BnfExpressionData allData = new BnfExpressionData(); + allData.AddRange(nt.Rule.Data); + if (nt.ErrorRule != null) + allData.AddRange(nt.ErrorRule.Data); + //actually create productions for each sequence + foreach (BnfTermList prodOperands in allData) + { + Production prod = CreateProduction(nt, prodOperands); + nt.Productions.Add(prod); + } //foreach prodOperands + } + } + + private Production CreateProduction(NonTerminal lvalue, BnfTermList operands) + { + Production prod = new Production(lvalue); + GrammarHintList hints = null; + //create RValues list skipping Empty terminal and collecting grammar hints + foreach (BnfTerm operand in operands) + { + if (operand == _grammar.Empty) + continue; + //Collect hints as we go - they will be added to the next non-hint element + GrammarHint hint = operand as GrammarHint; + if (hint != null) + { + if (hints == null) hints = new GrammarHintList(); + hints.Add(hint); + continue; + } + //Add the operand and create LR0 Item + prod.RValues.Add(operand); + prod.LR0Items.Add(new LR0Item(_lastItemId++, prod, prod.RValues.Count - 1, hints)); + hints = null; + }//foreach operand + //set the flags + if (prod.RValues.Count == 0) + prod.Flags |= ProductionFlags.IsEmpty; + //Add final LRItem + ComputeProductionFlags(prod); + prod.LR0Items.Add(new LR0Item(_lastItemId++, prod, prod.RValues.Count, hints)); + return prod; + } + + private void ComputeProductionFlags(Production production) + { + production.Flags = ProductionFlags.None; + foreach (var rv in production.RValues) + { + //Check if it is a Terminal or Error element + var t = rv as Terminal; + if (t != null) + { + production.Flags |= ProductionFlags.HasTerminals; + if (t.Category == TokenCategory.Error) production.Flags |= ProductionFlags.IsError; + } + if (rv.Flags.IsSet(TermFlags.IsPunctuation)) continue; + }//foreach + }//method + + private static void ComputeNonTerminalsNullability(GrammarData data) + { + var undecided = data.NonTerminals; + while (undecided.Count > 0) + { + var newUndecided = new NonTerminalSet(); + foreach (NonTerminal nt in undecided) + if (!ComputeNullability(nt)) + newUndecided.Add(nt); + if (undecided.Count == newUndecided.Count) return; //we didn't decide on any new, so we're done + undecided = newUndecided; + }//while + } + + private static bool ComputeNullability(NonTerminal nonTerminal) + { + foreach (Production prod in nonTerminal.Productions) + { + if (prod.RValues.Count == 0) + { + nonTerminal.SetFlag(TermFlags.IsNullable); + return true; //decided, Nullable + }//if + //If production has terminals, it is not nullable and cannot contribute to nullability + if (prod.Flags.IsSet(ProductionFlags.HasTerminals)) continue; + //Go thru all elements of production and check nullability + bool allNullable = true; + foreach (BnfTerm child in prod.RValues) + { + allNullable &= child.Flags.IsSet(TermFlags.IsNullable); + }//foreach child + if (allNullable) + { + nonTerminal.SetFlag(TermFlags.IsNullable); + return true; + } + }//foreach prod + return false; //cannot decide + } + + private static void ComputeTailsNullability(GrammarData data) + { + foreach (var nt in data.NonTerminals) + { + foreach (var prod in nt.Productions) + { + var count = prod.LR0Items.Count; + for (int i = count - 1; i >= 0; i--) + { + var item = prod.LR0Items[i]; + item.TailIsNullable = true; + if (item.Current == null) continue; + if (!item.Current.Flags.IsSet(TermFlags.IsNullable)) + break; //for i + }//for i + }//foreach prod + } + } + + #region Grammar Validation + + private void ValidateGrammar() + { + var createAst = _grammar.LanguageFlags.IsSet(LanguageFlags.CreateAst); + var invalidTransSet = new NonTerminalSet(); + foreach (var nt in _grammarData.NonTerminals) + { + if (nt.Flags.IsSet(TermFlags.IsTransient)) + { + //List non-terminals cannot be marked transient - otherwise there may be some ambiguities and inconsistencies + if (nt.Flags.IsSet(TermFlags.IsList)) + _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrListCannotBeTransient, nt.Name); + //Count number of non-punctuation child nodes in each production + foreach (var prod in nt.Productions) + if (CountNonPunctuationTerms(prod) > 1) invalidTransSet.Add(nt); + }//if transient + //Validate error productions + foreach (var prod in nt.Productions) + if (prod.Flags.IsSet(ProductionFlags.IsError)) + { + var lastTerm = prod.RValues[prod.RValues.Count - 1]; + if (!(lastTerm is Terminal) || lastTerm == _grammar.SyntaxError) + _language.Errors.Add(GrammarErrorLevel.Warning, null, Resources.ErrLastTermOfErrorProd, nt.Name); + // "The last term of error production must be a terminal. NonTerminal: {0}" + }//foreach prod + }//foreac nt + + if (invalidTransSet.Count > 0) + _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTransientNtMustHaveOneTerm, invalidTransSet.ToString()); + }//method + + private int CountNonPunctuationTerms(Production production) + { + int count = 0; + foreach (var rvalue in production.RValues) + if (!rvalue.Flags.IsSet(TermFlags.IsPunctuation)) count++; + return count; + } + + #endregion + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/LanguageDataBuilder.cs b/src/Irony/Parsing/Data/Construction/LanguageDataBuilder.cs new file mode 100644 index 0000000..a25d71a --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/LanguageDataBuilder.cs @@ -0,0 +1,71 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Diagnostics; + +namespace Sanchime.Irony.Parsing.Data +{ + internal class LanguageDataBuilder + { + internal LanguageData Language; + private Grammar _grammar; + + public LanguageDataBuilder(LanguageData language) + { + Language = language; + _grammar = Language.Grammar; + } + + public bool Build() + { + var sw = new Stopwatch(); + try + { + if (_grammar.Root == null) + Language.Errors.AddAndThrow(GrammarErrorLevel.Error, null, Resources.ErrRootNotSet); + sw.Start(); + var gbld = new GrammarDataBuilder(Language); + gbld.Build(); + //Just in case grammar author wants to customize something... + _grammar.OnGrammarDataConstructed(Language); + var sbld = new ScannerDataBuilder(Language); + sbld.Build(); + var pbld = new ParserDataBuilder(Language); + pbld.Build(); + Validate(); + //call grammar method, a chance to tweak the automaton + _grammar.OnLanguageDataConstructed(Language); + return true; + } + catch (GrammarErrorException) + { + return false; //grammar error should be already added to Language.Errors collection + } + finally + { + Language.ErrorLevel = Language.Errors.GetMaxLevel(); + sw.Stop(); + Language.ConstructionTime = sw.ElapsedMilliseconds; + } + } + + #region Language Data Validation + + private void Validate() + { + }//method + + #endregion + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/ParserDataBuilder.cs b/src/Irony/Parsing/Data/Construction/ParserDataBuilder.cs new file mode 100644 index 0000000..d45eea2 --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/ParserDataBuilder.cs @@ -0,0 +1,472 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; +using Sanchime.Irony.Parsing.Parsers.SpecialActionsHints; + +namespace Sanchime.Irony.Parsing.Data +{ + // Methods constructing LALR automaton. + // See _about_parser_construction.txt file in this folder for important comments + + internal partial class ParserDataBuilder + { + private LanguageData _language; + private ParserData _data; + private Grammar _grammar; + private ParserStateHash _stateHash = new ParserStateHash(); + + internal ParserDataBuilder(LanguageData language) + { + _language = language; + _grammar = _language.Grammar; + } + + public void Build() + { + _stateHash.Clear(); + _data = _language.ParserData; + CreateParserStates(); + var itemsNeedLookaheads = GetReduceItemsInInadequateState(); + ComputeTransitions(itemsNeedLookaheads); + ComputeLookaheads(itemsNeedLookaheads); + ComputeStatesExpectedTerminals(); + ComputeConflicts(); + ApplyHints(); + HandleUnresolvedConflicts(); + CreateRemainingReduceActions(); + //Create error action - if it is not created yet by some hint or custom code + if (_data.ErrorAction == null) + _data.ErrorAction = new ErrorRecoveryParserAction(); + }//method + + #region Creating parser states + + private void CreateParserStates() + { + var grammarData = _language.GrammarData; + + //1. Base automaton: create states for main augmented root for the grammar + _data.InitialState = CreateInitialState(grammarData.AugmentedRoot); + ExpandParserStateList(0); + CreateAcceptAction(_data.InitialState, grammarData.AugmentedRoot); + + //2. Expand automaton: add parser states from additional roots + foreach (var augmRoot in grammarData.AugmentedSnippetRoots) + { + var initialState = CreateInitialState(augmRoot); + ExpandParserStateList(_data.States.Count - 1); //start with just added state - it is the last state in the list + CreateAcceptAction(initialState, augmRoot); + } + } + + private void CreateAcceptAction(ParserState initialState, NonTerminal augmentedRoot) + { + var root = augmentedRoot.Productions[0].RValues[0]; + var shiftAction = initialState.Actions[root] as ShiftParserAction; + var shiftOverRootState = shiftAction.NewState; + shiftOverRootState.Actions[_grammar.Eof] = new AcceptParserAction(); + } + + private ParserState CreateInitialState(NonTerminal augmentedRoot) + { + //for an augmented root there is an initial production "Root' -> .Root"; so we need the LR0 item at 0 index + var iniItemSet = new LR0ItemSet(); + iniItemSet.Add(augmentedRoot.Productions[0].LR0Items[0]); + var initialState = FindOrCreateState(iniItemSet); + var rootNt = augmentedRoot.Productions[0].RValues[0] as NonTerminal; + _data.InitialStates[rootNt] = initialState; + return initialState; + } + + private void ExpandParserStateList(int initialIndex) + { + // Iterate through states (while new ones are created) and create shift transitions and new states + for (int index = initialIndex; index < _data.States.Count; index++) + { + var state = _data.States[index]; + //Get all possible shifts + foreach (var term in state.BuilderData.ShiftTerms) + { + var shiftItems = state.BuilderData.ShiftItems.SelectByCurrent(term); + //Get set of shifted cores and find/create target state + var shiftedCoreItems = shiftItems.GetShiftedCores(); + var newState = FindOrCreateState(shiftedCoreItems); + //Create shift action + var newAction = new ShiftParserAction(term, newState); + state.Actions[term] = newAction; + //Link items in old/new states + foreach (var shiftItem in shiftItems) + { + shiftItem.ShiftedItem = newState.BuilderData.AllItems.FindByCore(shiftItem.Core.ShiftedItem); + }//foreach shiftItem + }//foreach term + } //for index + }//method + + private ParserState FindOrCreateState(LR0ItemSet coreItems) + { + string key = ComputeLR0ItemSetKey(coreItems); + ParserState state; + if (_stateHash.TryGetValue(key, out state)) + return state; + //create new state + state = new ParserState("S" + _data.States.Count); + state.BuilderData = new ParserStateData(state, coreItems); + _data.States.Add(state); + _stateHash[key] = state; + return state; + } + + #endregion + + #region Compute transitions, lookbacks, lookaheads + + //We compute only transitions that are really needed to compute lookaheads in inadequate states. + // We start with reduce items in inadequate state and find their lookbacks - this is initial list of transitions. + // Then for each transition in the list we check if it has items with nullable tails; for those items we compute + // lookbacks - these are new or already existing transitons - and so on, we repeat the operation until no new transitions + // are created. + private void ComputeTransitions(LRItemSet forItems) + { + var newItemsNeedLookbacks = forItems; + while (newItemsNeedLookbacks.Count > 0) + { + var newTransitions = CreateLookbackTransitions(newItemsNeedLookbacks); + newItemsNeedLookbacks = SelectNewItemsThatNeedLookback(newTransitions); + } + } + + private LRItemSet SelectNewItemsThatNeedLookback(TransitionList transitions) + { + //Select items with nullable tails that don't have lookbacks yet + var items = new LRItemSet(); + foreach (var trans in transitions) + foreach (var item in trans.Items) + if (item.Core.TailIsNullable && item.Lookbacks.Count == 0) //only if it does not have lookbacks yet + items.Add(item); + return items; + } + + private LRItemSet GetReduceItemsInInadequateState() + { + var result = new LRItemSet(); + foreach (var state in _data.States) + { + if (state.BuilderData.IsInadequate) + result.UnionWith(state.BuilderData.ReduceItems); + } + return result; + } + + private TransitionList CreateLookbackTransitions(LRItemSet sourceItems) + { + var newTransitions = new TransitionList(); + //Build set of initial cores - this is optimization for performance + //We need to find all initial items in all states that shift into one of sourceItems + // Each such initial item would have the core from the "initial" cores set that we build from source items. + var iniCores = new LR0ItemSet(); + foreach (var sourceItem in sourceItems) + iniCores.Add(sourceItem.Core.Production.LR0Items[0]); + //find + foreach (var state in _data.States) + { + foreach (var iniItem in state.BuilderData.InitialItems) + { + if (!iniCores.Contains(iniItem.Core)) continue; + var iniItemNt = iniItem.Core.Production.LValue; // iniItem's non-terminal (left side of production) + Transition lookback = null; // local var for lookback - transition over iniItemNt + var currItem = iniItem; // iniItem is initial item for all currItem's in the shift chain. + while (currItem != null) + { + if (sourceItems.Contains(currItem)) + { + // We create transitions lazily, only when we actually need them. Check if we have iniItem's transition + // in local variable; if not, get it from state's transitions table; if not found, create it. + if (lookback == null && !state.BuilderData.Transitions.TryGetValue(iniItemNt, out lookback)) + { + lookback = new Transition(state, iniItemNt); + newTransitions.Add(lookback); + } + //Now for currItem, either add trans to Lookbacks, or "include" it into currItem.Transition + // We need lookbacks ONLY for final items; for non-Final items we need proper Include lists on transitions + if (currItem.Core.IsFinal) + currItem.Lookbacks.Add(lookback); + else // if (currItem.Transition != null) + // Note: looks like checking for currItem.Transition is redundant - currItem is either: + // - Final - always the case for the first run of this method; + // - it has a transition after the first run, due to the way we select sourceItems list + // in SelectNewItemsThatNeedLookback (by transitions) + currItem.Transition.Include(lookback); + }//if + //move to next item + currItem = currItem.ShiftedItem; + }//while + }//foreach iniItem + }//foreach state + return newTransitions; + } + + private void ComputeLookaheads(LRItemSet forItems) + { + foreach (var reduceItem in forItems) + { + // Find all source states - those that contribute lookaheads + var sourceStates = new ParserStateSet(); + foreach (var lookbackTrans in reduceItem.Lookbacks) + { + sourceStates.Add(lookbackTrans.ToState); + sourceStates.UnionWith(lookbackTrans.ToState.BuilderData.ReadStateSet); + foreach (var includeTrans in lookbackTrans.Includes) + { + sourceStates.Add(includeTrans.ToState); + sourceStates.UnionWith(includeTrans.ToState.BuilderData.ReadStateSet); + }//foreach includeTrans + }//foreach lookbackTrans + //Now merge all shift terminals from all source states + foreach (var state in sourceStates) + reduceItem.Lookaheads.UnionWith(state.BuilderData.ShiftTerminals); + //Remove SyntaxError - it is pseudo terminal + if (reduceItem.Lookaheads.Contains(_grammar.SyntaxError)) + reduceItem.Lookaheads.Remove(_grammar.SyntaxError); + //Sanity check + if (reduceItem.Lookaheads.Count == 0) + _language.Errors.Add(GrammarErrorLevel.InternalError, reduceItem.State, "Reduce item '{0}' in state {1} has no lookaheads.", reduceItem.Core, reduceItem.State); + }//foreach reduceItem + }//method + + #endregion + + #region Analyzing and resolving conflicts + + private void ComputeConflicts() + { + foreach (var state in _data.States) + { + if (!state.BuilderData.IsInadequate) + continue; + //first detect conflicts + var stateData = state.BuilderData; + stateData.Conflicts.Clear(); + var allLkhds = new BnfTermSet(); + //reduce/reduce -------------------------------------------------------------------------------------- + foreach (var item in stateData.ReduceItems) + { + foreach (var lkh in item.Lookaheads) + { + if (allLkhds.Contains(lkh)) + state.BuilderData.Conflicts.Add(lkh); + allLkhds.Add(lkh); + }//foreach lkh + }//foreach item + + //shift/reduce --------------------------------------------------------------------------------------- + foreach (var term in stateData.ShiftTerminals) + if (allLkhds.Contains(term)) + { + stateData.Conflicts.Add(term); + } + } + }//method + + private void ApplyHints() + { + foreach (var state in _data.States) + { + var stateData = state.BuilderData; + //Add automatic precedence hints + if (stateData.Conflicts.Count > 0) + foreach (var conflict in stateData.Conflicts.ToList()) + if (conflict.Flags.IsSet(TermFlags.IsOperator)) + { + //Find any reduce item with this lookahead and add PrecedenceHint + var reduceItem = stateData.ReduceItems.SelectByLookahead(conflict).First(); + var precHint = new PrecedenceHint(); + reduceItem.Core.Hints.Add(precHint); + } + // Apply (activate) hints - these should resolve conflicts as well + foreach (var item in state.BuilderData.AllItems) + foreach (var hint in item.Core.Hints) + hint.Apply(_language, item); + }//foreach + }//method + + //Resolve to default actions + private void HandleUnresolvedConflicts() + { + foreach (var state in _data.States) + { + if (state.BuilderData.Conflicts.Count == 0) + continue; + var shiftReduceConflicts = state.BuilderData.GetShiftReduceConflicts(); + var reduceReduceConflicts = state.BuilderData.GetReduceReduceConflicts(); + var stateData = state.BuilderData; + if (shiftReduceConflicts.Count > 0) + _language.Errors.Add(GrammarErrorLevel.Conflict, state, Resources.ErrSRConflict, state, shiftReduceConflicts.ToString()); + if (reduceReduceConflicts.Count > 0) + _language.Errors.Add(GrammarErrorLevel.Conflict, state, Resources.ErrRRConflict, state, reduceReduceConflicts.ToString()); + //Create default actions for these conflicts. For shift-reduce, default action is shift, and shift action already + // exist for all shifts from the state, so we don't need to do anything, only report it + //For reduce-reduce create reduce actions for the first reduce item (whatever comes first in the set). + foreach (var conflict in reduceReduceConflicts) + { + var reduceItems = stateData.ReduceItems.SelectByLookahead(conflict); + var firstProd = reduceItems.First().Core.Production; + var action = new ReduceParserAction(firstProd); + state.Actions[conflict] = action; + } + //stateData.Conflicts.Clear(); -- do not clear them, let the set keep the auto-resolved conflicts, may find more use for this later + } + } + + #endregion + + #region final actions: creating remaining reduce actions, computing expected terminals, cleaning up state data + + //Create reduce actions for states with a single reduce item (and no shifts) + private void CreateRemainingReduceActions() + { + foreach (var state in _data.States) + { + if (state.DefaultAction != null) continue; + var stateData = state.BuilderData; + if (stateData.ShiftItems.Count == 0 && stateData.ReduceItems.Count == 1) + { + state.DefaultAction = ReduceParserAction.Create(stateData.ReduceItems.First().Core.Production); + continue; //next state; if we have default reduce action, we don't need to fill actions dictionary for lookaheads + } + //create actions + foreach (var item in state.BuilderData.ReduceItems) + { + var action = ReduceParserAction.Create(item.Core.Production); + foreach (var lkh in item.Lookaheads) + { + if (state.Actions.ContainsKey(lkh)) continue; + state.Actions[lkh] = action; + } + }//foreach item + }//foreach state + } + + //Note that for states with a single reduce item the result is empty + private void ComputeStatesExpectedTerminals() + { + foreach (var state in _data.States) + { + state.ExpectedTerminals.UnionWith(state.BuilderData.ShiftTerminals); + //Add lookaheads from reduce items + foreach (var reduceItem in state.BuilderData.ReduceItems) + state.ExpectedTerminals.UnionWith(reduceItem.Lookaheads); + RemoveTerminals(state.ExpectedTerminals, _grammar.SyntaxError, _grammar.Eof); + }//foreach state + } + + private void RemoveTerminals(TerminalSet terms, params Terminal[] termsToRemove) + { + foreach (var termToRemove in termsToRemove) + if (terms.Contains(termToRemove)) terms.Remove(termToRemove); + } + + public void CleanupStateData() + { + foreach (var state in _data.States) + state.ClearData(); + } + + #endregion + + #region Utilities: ComputeLR0ItemSetKey + + //Parser states are distinguished by the subset of kernel LR0 items. + // So when we derive new LR0-item list by shift operation, + // we need to find out if we have already a state with the same LR0Item list. + // We do it by looking up in a state hash by a key - [LR0 item list key]. + // Each list's key is a concatenation of items' IDs separated by ','. + // Before producing the key for a list, the list must be sorted; + // thus we garantee one-to-one correspondence between LR0Item sets and keys. + // And of course, we count only kernel items (with dot NOT in the first position). + public static string ComputeLR0ItemSetKey(LR0ItemSet items) + { + if (items.Count == 0) return string.Empty; + //Copy non-initial items to separate list, and then sort it + LR0ItemList itemList = new LR0ItemList(); + foreach (var item in items) + itemList.Add(item); + //quick shortcut + if (itemList.Count == 1) + return itemList[0].ID.ToString(); + itemList.Sort(CompareLR0Items); //Sort by ID + //now build the key + var sb = new StringBuilder(100); + foreach (var item in itemList) + { + sb.Append(item.ID); + sb.Append(","); + }//foreach + return sb.ToString(); + } + + private static int CompareLR0Items(LR0Item x, LR0Item y) + { + if (x.ID < y.ID) return -1; + if (x.ID == y.ID) return 0; + return 1; + } + + #endregion + + #region comments + + // Computes set of expected terms in a parser state. While there may be extended list of symbols expected at some point, + // we want to reorganize and reduce it. For example, if the current state expects all arithmetic operators as an input, + // it would be better to not list all operators (+, -, *, /, etc) but simply put "operator" covering them all. + // To achieve this grammar writer can group operators (or any other terminals) into named groups using Grammar's methods + // AddTermReportGroup, AddNoReportGroup etc. Then instead of reporting each operator separately, Irony would include + // a single "group name" to represent them all. + // The "expected report set" is not computed during parser construction (it would bite considerable time), but on demand during parsing, + // when error is detected and the expected set is actually needed for error message. + // Multi-threading concerns. When used in multi-threaded environment (web server), the LanguageData would be shared in + // application-wide cache to avoid rebuilding the parser data on every request. The LanguageData is immutable, except + // this one case - the expected sets are constructed late by CoreParser on the when-needed basis. + // We don't do any locking here, just compute the set and on return from this function the state field is assigned. + // We assume that this field assignment is an atomic, concurrency-safe operation. The worst thing that might happen + // is "double-effort" when two threads start computing the same set around the same time, and the last one to finish would + // leave its result in the state field. + + #endregion + + internal static StringSet ComputeGroupedExpectedSetForState(Grammar grammar, ParserState state) + { + var terms = new TerminalSet(); + terms.UnionWith(state.ExpectedTerminals); + var result = new StringSet(); + //Eliminate no-report terminals + foreach (var group in grammar.TermReportGroups) + if (group.GroupType == TermReportGroupType.DoNotReport) + terms.ExceptWith(group.Terminals); + //Add normal and operator groups + foreach (var group in grammar.TermReportGroups) + if ((group.GroupType == TermReportGroupType.Normal || group.GroupType == TermReportGroupType.Operator) && + terms.Overlaps(group.Terminals)) + { + result.Add(group.Alias); + terms.ExceptWith(group.Terminals); + } + //Add remaining terminals "as is" + foreach (var terminal in terms) + result.Add(terminal.ErrorAlias); + return result; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/ParserDataBuilder_HelperClasses.cs b/src/Irony/Parsing/Data/Construction/ParserDataBuilder_HelperClasses.cs new file mode 100644 index 0000000..844df8c --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/ParserDataBuilder_HelperClasses.cs @@ -0,0 +1,347 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +//Helper data classes for ParserDataBuilder +// Note about using LRItemSet vs LRItemList. +// It appears that in many places the LRItemList would be a better (and faster) choice than LRItemSet. +// Many of the sets are actually lists and don't require hashset's functionality. +// But surprisingly, using LRItemSet proved to have much better performance (twice faster for lookbacks/lookaheads computation), so LRItemSet +// is used everywhere. +namespace Sanchime.Irony.Parsing.Data +{ + public class ParserStateData + { + public readonly ParserState State; + public readonly LRItemSet AllItems = new LRItemSet(); + public readonly LRItemSet ShiftItems = new LRItemSet(); + public readonly LRItemSet ReduceItems = new LRItemSet(); + public readonly LRItemSet InitialItems = new LRItemSet(); + public readonly BnfTermSet ShiftTerms = new BnfTermSet(); + public readonly TerminalSet ShiftTerminals = new TerminalSet(); + public readonly TerminalSet Conflicts = new TerminalSet(); + public readonly bool IsInadequate; + public LR0ItemSet AllCores = new LR0ItemSet(); + + //used for creating canonical states from core set + public ParserStateData(ParserState state, LR0ItemSet kernelCores) + { + State = state; + foreach (var core in kernelCores) + AddItem(core); + IsInadequate = ReduceItems.Count > 1 || ReduceItems.Count == 1 && ShiftItems.Count > 0; + } + + public void AddItem(LR0Item core) + { + //Check if a core had been already added. If yes, simply return + if (!AllCores.Add(core)) return; + //Create new item, add it to AllItems, InitialItems, ReduceItems or ShiftItems + var item = new LRItem(State, core); + AllItems.Add(item); + if (item.Core.IsFinal) + ReduceItems.Add(item); + else + ShiftItems.Add(item); + if (item.Core.IsInitial) + InitialItems.Add(item); + if (core.IsFinal) return; + //Add current term to ShiftTerms + if (!ShiftTerms.Add(core.Current)) return; + if (core.Current is Terminal) + ShiftTerminals.Add(core.Current as Terminal); + //If current term (core.Current) is a new non-terminal, expand it + var currNt = core.Current as NonTerminal; + if (currNt == null) return; + foreach (var prod in currNt.Productions) + AddItem(prod.LR0Items[0]); + }//method + + public TransitionTable Transitions + { + get + { + if (_transitions == null) + _transitions = new TransitionTable(); + return _transitions; + } + } + + private TransitionTable _transitions; + + //A set of states reachable through shifts over nullable non-terminals. Computed on demand + public ParserStateSet ReadStateSet + { + get + { + if (_readStateSet == null) + { + _readStateSet = new ParserStateSet(); + foreach (var shiftTerm in State.BuilderData.ShiftTerms) + if (shiftTerm.Flags.IsSet(TermFlags.IsNullable)) + { + var shift = State.Actions[shiftTerm] as ShiftParserAction; + var targetState = shift.NewState; + _readStateSet.Add(targetState); + _readStateSet.UnionWith(targetState.BuilderData.ReadStateSet); //we shouldn't get into loop here, the chain of reads is finite + } + }//if + return _readStateSet; + } + } + + private ParserStateSet _readStateSet; + + public ParserState GetNextState(BnfTerm shiftTerm) + { + var shift = ShiftItems.FirstOrDefault(item => item.Core.Current == shiftTerm); + if (shift == null) return null; + return shift.ShiftedItem.State; + } + + public TerminalSet GetShiftReduceConflicts() + { + var result = new TerminalSet(); + result.UnionWith(Conflicts); + result.IntersectWith(ShiftTerminals); + return result; + } + + public TerminalSet GetReduceReduceConflicts() + { + var result = new TerminalSet(); + result.UnionWith(Conflicts); + result.ExceptWith(ShiftTerminals); + return result; + } + }//class + + //An object representing inter-state transitions. Defines Includes, IncludedBy that are used for efficient lookahead computation + public class Transition + { + public readonly ParserState FromState; + public readonly ParserState ToState; + public readonly NonTerminal OverNonTerminal; + public readonly LRItemSet Items; + public readonly TransitionSet Includes = new TransitionSet(); + public readonly TransitionSet IncludedBy = new TransitionSet(); + private int _hashCode; + + public Transition(ParserState fromState, NonTerminal overNonTerminal) + { + FromState = fromState; + OverNonTerminal = overNonTerminal; + var shiftItem = fromState.BuilderData.ShiftItems.First(item => item.Core.Current == overNonTerminal); + ToState = FromState.BuilderData.GetNextState(overNonTerminal); + _hashCode = unchecked(FromState.GetHashCode() - overNonTerminal.GetHashCode()); + FromState.BuilderData.Transitions.Add(overNonTerminal, this); + Items = FromState.BuilderData.ShiftItems.SelectByCurrent(overNonTerminal); + foreach (var item in Items) + { + item.Transition = this; + } + }//constructor + + public void Include(Transition other) + { + if (other == this) return; + if (!IncludeTransition(other)) return; + //include children + foreach (var child in other.Includes) + { + IncludeTransition(child); + } + } + + private bool IncludeTransition(Transition other) + { + if (!Includes.Add(other)) return false; + other.IncludedBy.Add(this); + //propagate "up" + foreach (var incBy in IncludedBy) + incBy.IncludeTransition(other); + return true; + } + + public override string ToString() + { + return FromState.Name + " -> (over " + OverNonTerminal.Name + ") -> " + ToState.Name; + } + + public override int GetHashCode() + { + return _hashCode; + } + }//class + + public class TransitionSet : HashSet + { } + + public class TransitionList : List + { } + + public class TransitionTable : Dictionary + { } + + public class LRItem + { + public readonly ParserState State; + public readonly LR0Item Core; + + //these properties are used in lookahead computations + public LRItem ShiftedItem; + + public Transition Transition; + private int _hashCode; + + //Lookahead info for reduce items + public TransitionSet Lookbacks = new TransitionSet(); + + public TerminalSet Lookaheads = new TerminalSet(); + + public LRItem(ParserState state, LR0Item core) + { + State = state; + Core = core; + _hashCode = unchecked(state.GetHashCode() + core.GetHashCode()); + } + + public override string ToString() + { + return Core.ToString(); + } + + public override int GetHashCode() + { + return _hashCode; + } + + public TerminalSet GetLookaheadsInConflict() + { + var lkhc = new TerminalSet(); + lkhc.UnionWith(Lookaheads); + lkhc.IntersectWith(State.BuilderData.Conflicts); + return lkhc; + } + }//LRItem class + + public class LRItemList : List + { } + + public class LRItemSet : HashSet + { + public LRItem FindByCore(LR0Item core) + { + foreach (LRItem item in this) + if (item.Core == core) return item; + return null; + } + + public LRItemSet SelectByCurrent(BnfTerm current) + { + var result = new LRItemSet(); + foreach (var item in this) + if (item.Core.Current == current) + result.Add(item); + return result; + } + + public LR0ItemSet GetShiftedCores() + { + var result = new LR0ItemSet(); + foreach (var item in this) + if (item.Core.ShiftedItem != null) + result.Add(item.Core.ShiftedItem); + return result; + } + + public LRItemSet SelectByLookahead(Terminal lookahead) + { + var result = new LRItemSet(); + foreach (var item in this) + if (item.Lookaheads.Contains(lookahead)) + result.Add(item); + return result; + } + }//class + + public partial class LR0Item + { + public readonly Production Production; + public readonly int Position; + public readonly BnfTerm Current; + public bool TailIsNullable; + public GrammarHintList Hints = new GrammarHintList(); + + //automatically generated IDs - used for building keys for lists of kernel LR0Items + // which in turn are used to quickly lookup parser states in hash + internal readonly int ID; + + public LR0Item(int id, Production production, int position, GrammarHintList hints) + { + ID = id; + Production = production; + Position = position; + Current = Position < Production.RValues.Count ? Production.RValues[Position] : null; + if (hints != null) + Hints.AddRange(hints); + _hashCode = ID.ToString().GetHashCode(); + }//method + + public LR0Item ShiftedItem + { + get + { + if (Position >= Production.LR0Items.Count - 1) + return null; + else + return Production.LR0Items[Position + 1]; + } + } + + public bool IsKernel + { + get { return Position > 0; } + } + + public bool IsInitial + { + get { return Position == 0; } + } + + public bool IsFinal + { + get { return Position == Production.RValues.Count; } + } + + public override string ToString() + { + return Production.ProductionToString(Production, Position); + } + + public override int GetHashCode() + { + return _hashCode; + } + + private int _hashCode; + }//LR0Item + + public class LR0ItemList : List + { } + + public class LR0ItemSet : HashSet + { } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/ScannerDataBuilder.cs b/src/Irony/Parsing/Data/Construction/ScannerDataBuilder.cs new file mode 100644 index 0000000..0f7d2e2 --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/ScannerDataBuilder.cs @@ -0,0 +1,143 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Data +{ + internal class ScannerDataBuilder + { + private LanguageData _language; + private Grammar _grammar; + private GrammarData _grammarData; + private ScannerData _data; + + internal ScannerDataBuilder(LanguageData language) + { + _language = language; + _grammar = _language.Grammar; + _grammarData = language.GrammarData; + } + + internal void Build() + { + _data = _language.ScannerData; + InitMultilineTerminalsList(); + ProcessNonGrammarTerminals(); + BuildTerminalsLookupTable(); + } + + private void InitMultilineTerminalsList() + { + foreach (var terminal in _grammarData.Terminals) + { + if (terminal.Flags.IsSet(TermFlags.IsNonScanner)) continue; + if (terminal.Flags.IsSet(TermFlags.IsMultiline)) + { + _data.MultilineTerminals.Add(terminal); + terminal.MultilineIndex = (byte)_data.MultilineTerminals.Count; + } + } + } + + private void ProcessNonGrammarTerminals() + { + foreach (var term in _grammar.NonGrammarTerminals) + { + var firsts = term.GetFirsts(); + if (firsts == null || firsts.Count == 0) + { + _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTerminalHasEmptyPrefix, term.Name); + continue; + } + AddTerminalToLookup(_data.NonGrammarTerminalsLookup, term, firsts); + }//foreach term + + //sort each list + foreach (var list in _data.NonGrammarTerminalsLookup.Values) + { + if (list.Count > 1) + list.Sort(Terminal.ByPriorityReverse); + }//foreach list + } + + private void BuildTerminalsLookupTable() + { + foreach (Terminal term in _grammarData.Terminals) + { + //Non-grammar terminals are scanned in a separate step, before regular terminals; so we don't include them here + if (term.Flags.IsSet(TermFlags.IsNonScanner | TermFlags.IsNonGrammar)) continue; + var firsts = term.GetFirsts(); + if (firsts == null || firsts.Count == 0) + { + _grammarData.NoPrefixTerminals.Add(term); + continue; //foreach term + } + AddTerminalToLookup(_data.TerminalsLookup, term, firsts); + }//foreach term + + if (_grammarData.NoPrefixTerminals.Count > 0) + { + //copy them to Scanner data + _data.NoPrefixTerminals.AddRange(_grammarData.NoPrefixTerminals); + // Sort in reverse priority order + _data.NoPrefixTerminals.Sort(Terminal.ByPriorityReverse); + //Now add Fallback terminals to every list, then sort lists by reverse priority + // so that terminal with higher priority comes first in the list + foreach (TerminalList list in _data.TerminalsLookup.Values) + foreach (var ft in _data.NoPrefixTerminals) + if (!list.Contains(ft)) + list.Add(ft); + }//if count > 0 + + //Finally sort every list in terminals lookup table + foreach (TerminalList list in _data.TerminalsLookup.Values) + if (list.Count > 1) + list.Sort(Terminal.ByPriorityReverse); + }//method + + private void AddTerminalToLookup(TerminalLookupTable _lookup, Terminal term, IList firsts) + { + foreach (string prefix in firsts) + { + if (string.IsNullOrEmpty(prefix)) + { + _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTerminalHasEmptyPrefix, term.Name); + continue; + } + //Calculate hash key for the prefix + char firstChar = prefix[0]; + if (_grammar.CaseSensitive) + AddTerminalToLookupByFirstChar(_lookup, term, firstChar); + else + { + AddTerminalToLookupByFirstChar(_lookup, term, char.ToLower(firstChar)); + AddTerminalToLookupByFirstChar(_lookup, term, char.ToUpper(firstChar)); + }//if + }//foreach prefix + } + + private void AddTerminalToLookupByFirstChar(TerminalLookupTable _lookup, Terminal term, char firstChar) + { + TerminalList currentList; + if (!_lookup.TryGetValue(firstChar, out currentList)) + { + //if list does not exist yet, create it + currentList = new TerminalList(); + _lookup[firstChar] = currentList; + } + //add terminal to the list + if (!currentList.Contains(term)) + currentList.Add(term); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/Construction/_about_parser_construction.txt b/src/Irony/Parsing/Data/Construction/_about_parser_construction.txt new file mode 100644 index 0000000..e05fd07 --- /dev/null +++ b/src/Irony/Parsing/Data/Construction/_about_parser_construction.txt @@ -0,0 +1,45 @@ + About parser construction algorithm in general + We follow DeRemer-Penello's algorithm, as it is described in Grune, Jacobs "Parsing Techniques" 2nd ed, section 9.7, p. 309. + There are a few differences: + 1. We compute lookbacks and transitions "on-demand" - only those that are actually needed for computing lookaheads in + reduce items in inadequate states. We start with reduce items in inadequate states - those are the only items that need lookaheads. + We then find all lookbacks (transitions) for these items. Then for each transition we find which ones need to "include" other parent + transitions - and compute this. And so on, until all transitions are created and linked through Include relationships + 2. We propagate Include relation between transitions immediately, when we add an include relation of one transition to another. See + Transition.Include method. Thus we avoid an extra step of "Transitive closure" of Include relation. See note about efficiency below. + 3. We don't use Reads and DirectRead relation between transitions. "Reads" relation + between transitions is replaced by Reads relation between states. So state A READS state B if you can move from state A to state B + using shifts over nullable non-terminals. ParserStateData.ReadStateSet contains all states that current state Reads. ReadStateSet + is computed on-demand, and all reads are immediately propagated through transitive chain - see source code of the method. + For DirectReads set for a transition in DeRemer-Penello - we use a state.ShiftTerminals set of the target state of the transition + - obviously this is the same set. + + Note about immediate Include propagation + I think that the method with immediate Includes propagation is as efficient as it can be, and using Transitive Closure optimization + through Strongly-Connected Components (SCC) algorithm would not be any faster. With immediate propagation we attempt to add + a transition to Includes set of another transition only once and stop propagation of the transition further down the chain if it is + already there. Essentially, we don't waste time propagating sets of transitions through chains of Includes if the transitions are + already there, propagated through different route. This is what SCC method is trying to mitigate - repeated propagation of transitions - + but this is not happening in our implementation. Maybe I'm mistaken, this is a guess, not a formal proof - let me know if you see + any flaws in my reasoning. + + About computing ExpectedTerminals set for parser states. + ExpectedTerms is a property of ParserState and contains all Terminals that parser expects in this state. This set is used by Scanner + to filter out terminals for the next token when it has a choice of more than one for a current input character. + (This is called Scanner-Parser link facility). + The question now is how to compute this set. There are are several kinds of Parser states: + 1. Containing shift items only. The ExpectedSet is a union of all "current" terms of all shift items. State.BuilderData.ShiftTerms + already contains this set - easy case. + 2. Containing shift AND reduce items. This is inadequate set. The expected set is a union of all current terms of shift items + (like in previous case) plus all lookaheads of reduce items. Reduce items have lookaheads computed, because it is an inadequate state. + 3. Containing 2 or more reduce items - this is again an inadequate state, each reduce items has lookaheads computed, so expected set + is a union of lookaheads of reduce items. + 4. Containing a single reduce item. This is a problem case. The state is not inadequate - we do not compute lookaheads for a single + reduce item, as there is no need for them - only a single action is possible. + The solution for the last case with a single reduce item is the following: we do not compute ExpectedSet for such states, but make sure + that scanner-parser link is never activated in this case. We do it in Parser code by NOT reading the next token from Scanner when + current state has a single reduce action (DefaultReduceAction property is not null). We do not read next token because it is not needed + for finding an action - there is one single possible action anyway. As a result the Scanner would never start scanning a new token + when parser in this single-reduce state - and therefore scanner would not invoke the parser-scanner link. + See Parser.ExecuteAction method for details. + diff --git a/src/Irony/Parsing/Data/GrammarData.cs b/src/Irony/Parsing/Data/GrammarData.cs new file mode 100644 index 0000000..64b042f --- /dev/null +++ b/src/Irony/Parsing/Data/GrammarData.cs @@ -0,0 +1,36 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Data +{ + //GrammarData is a container for all basic info about the grammar + // GrammarData is a field in LanguageData object. + public class GrammarData + { + public readonly LanguageData Language; + public readonly Grammar Grammar; + public NonTerminal AugmentedRoot; + public NonTerminalSet AugmentedSnippetRoots = new NonTerminalSet(); + public readonly BnfTermSet AllTerms = new BnfTermSet(); + public readonly TerminalSet Terminals = new TerminalSet(); + public readonly NonTerminalSet NonTerminals = new NonTerminalSet(); + public TerminalSet NoPrefixTerminals = new TerminalSet(); //Terminals that have no limited set of prefixes + + public GrammarData(LanguageData language) + { + Language = language; + Grammar = language.Grammar; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/LanguageData.cs b/src/Irony/Parsing/Data/LanguageData.cs new file mode 100644 index 0000000..04c0c08 --- /dev/null +++ b/src/Irony/Parsing/Data/LanguageData.cs @@ -0,0 +1,48 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Data +{ + public partial class LanguageData + { + public readonly Grammar Grammar; + public readonly GrammarData GrammarData; + public readonly ParserData ParserData; + public readonly ScannerData ScannerData; + public readonly GrammarErrorList Errors = new GrammarErrorList(); + public GrammarErrorLevel ErrorLevel = GrammarErrorLevel.NoError; + public long ConstructionTime; + public bool AstDataVerified; + + public LanguageData(Grammar grammar) + { + Grammar = grammar; + GrammarData = new GrammarData(this); + ParserData = new ParserData(this); + ScannerData = new ScannerData(this); + ConstructAll(); + } + + public void ConstructAll() + { + var builder = new LanguageDataBuilder(this); + builder.Build(); + } + + public bool CanParse() + { + return ErrorLevel < GrammarErrorLevel.Error; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/ParserData.cs b/src/Irony/Parsing/Data/ParserData.cs new file mode 100644 index 0000000..9be7355 --- /dev/null +++ b/src/Irony/Parsing/Data/ParserData.cs @@ -0,0 +1,152 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +// ParserData is a container for all information used by CoreParser in input processing. +// ParserData is a field in LanguageData structure and is used by CoreParser when parsing intput. +// The state graph entry is InitialState state; the state graph encodes information usually contained +// in what is known in literature as transiton/goto tables. +// The graph is built from the language grammar by ParserDataBuilder. +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Data +{ + public class ParserData + { + public readonly LanguageData Language; + public ParserState InitialState; //main initial state + public ParserStateTable InitialStates = new ParserStateTable(); // Lookup table: AugmRoot => InitialState + public readonly ParserStateList States = new ParserStateList(); + public ParserAction ErrorAction; + + public ParserData(LanguageData language) + { + Language = language; + } + } + + public partial class ParserState + { + public readonly string Name; + public readonly ParserActionTable Actions = new ParserActionTable(); + + //Defined for states with a single reduce item; Parser.GetAction returns this action if it is not null. + public ParserAction DefaultAction; + + //Expected terms contains terminals is to be used in + //Parser-advise-to-Scanner facility would use it to filter current terminals when Scanner has more than one terminal for current char, + // it can ask Parser to filter the list using the ExpectedTerminals in current Parser state. + public readonly TerminalSet ExpectedTerminals = new TerminalSet(); + + //Used for error reporting, we would use it to include list of expected terms in error message + // It is reduced compared to ExpectedTerms - some terms are "merged" into other non-terminals (with non-empty DisplayName) + // to make message shorter and cleaner. It is computed on-demand in CoreParser + public StringSet ReportedExpectedSet; + + internal ParserStateData BuilderData; //transient, used only during automaton construction and may be cleared after that + + //Custom flags available for use by language/parser authors, to "mark" states in some way + // Irony reserves the highest order byte for internal use + public int CustomFlags; + + public ParserState(string name) + { + Name = name; + } + + public void ClearData() + { + BuilderData = null; + } + + public override string ToString() + { + return Name; + } + + public override int GetHashCode() + { + return Name.GetHashCode(); + } + + public bool CustomFlagIsSet(int flag) + { + return (CustomFlags & flag) != 0; + } + }//class + + public class ParserStateList : List + { } + + public class ParserStateSet : HashSet + { } + + public class ParserStateHash : Dictionary + { } + + public class ParserStateTable : Dictionary + { } + + [Flags] + public enum ProductionFlags + { + None = 0, + HasTerminals = 0x02, //contains terminal + IsError = 0x04, //contains Error terminal + IsEmpty = 0x08, + } + + public partial class Production + { + public ProductionFlags Flags; + public readonly NonTerminal LValue; // left-side element + public readonly BnfTermList RValues = new BnfTermList(); //the right-side elements sequence + internal readonly LR0ItemList LR0Items = new LR0ItemList(); //LR0 items based on this production + + public Production(NonTerminal lvalue) + { + LValue = lvalue; + }//constructor + + public string ToStringQuoted() + { + return "'" + ToString() + "'"; + } + + public override string ToString() + { + return ProductionToString(this, -1); //no dot + } + + public static string ProductionToString(Production production, int dotPosition) + { + char dotChar = '\u00B7'; //dot in the middle of the line + var bld = new StringBuilder(); + bld.Append(production.LValue.Name); + bld.Append(" -> "); + for (int i = 0; i < production.RValues.Count; i++) + { + if (i == dotPosition) + bld.Append(dotChar); + bld.Append(production.RValues[i].Name); + bld.Append(" "); + }//for i + if (dotPosition == production.RValues.Count) + bld.Append(dotChar); + return bld.ToString(); + } + }//Production class + + public class ProductionList : List + { } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Data/ScannerData.cs b/src/Irony/Parsing/Data/ScannerData.cs new file mode 100644 index 0000000..24454a8 --- /dev/null +++ b/src/Irony/Parsing/Data/ScannerData.cs @@ -0,0 +1,36 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Data +{ + public class TerminalLookupTable : Dictionary + { } + + // ScannerData is a container for all detailed info needed by scanner to read input. + public class ScannerData + { + public readonly LanguageData Language; + public readonly TerminalLookupTable TerminalsLookup = new TerminalLookupTable(); //hash table for fast terminal lookup by input char + public readonly TerminalList MultilineTerminals = new TerminalList(); + public TerminalList NoPrefixTerminals = new TerminalList(); //Terminals with no limited set of prefixes, copied from GrammarData + + //hash table for fast lookup of non-grammar terminals by input char + public readonly TerminalLookupTable NonGrammarTerminalsLookup = new TerminalLookupTable(); + + public ScannerData(LanguageData language) + { + Language = language; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/BnfExpression.cs b/src/Irony/Parsing/Grammars/BnfExpression.cs new file mode 100644 index 0000000..4bb2b7a --- /dev/null +++ b/src/Irony/Parsing/Grammars/BnfExpression.cs @@ -0,0 +1,86 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Grammars +{ + //BNF expressions are represented as OR-list of Plus-lists of BNF terms + internal class BnfExpressionData : List + { + public override string ToString() + { + try + { + var pipeArr = new string[Count]; + for (int i = 0; i < Count; i++) + { + BnfTermList seq = this[i]; + string[] seqArr = new string[seq.Count]; + for (int j = 0; j < seq.Count; j++) + seqArr[j] = seq[j].ToString(); + pipeArr[i] = string.Join("+", seqArr); + } + return string.Join("|", pipeArr); + } + catch (Exception e) + { + return "(error: " + e.Message + ")"; + } + } + } + + public class BnfExpression : BnfTerm + { + public BnfExpression(BnfTerm element) : this() + { + Data[0].Add(element); + } + + public BnfExpression() : base(null) + { + Data = new BnfExpressionData + { + new BnfTermList() + }; + } + + internal BnfExpressionData Data; + + public override string ToString() + { + return Data.ToString(); + } + + #region ʽת + + public static implicit operator BnfExpression(string symbol) + { + return new BnfExpression(Grammar.CurrentGrammar.ToTerm(symbol)); + } + + //It seems better to define one method instead of the following two, with parameter of type BnfTerm - + // but that's not possible - it would be a conversion from base type of BnfExpression itself, which + // is not allowed in c# + public static implicit operator BnfExpression(Terminal term) + { + return new BnfExpression(term); + } + + public static implicit operator BnfExpression(NonTerminal nonTerminal) + { + return new BnfExpression(nonTerminal); + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/BnfTerm.cs b/src/Irony/Parsing/Grammars/BnfTerm.cs new file mode 100644 index 0000000..fe23fa0 --- /dev/null +++ b/src/Irony/Parsing/Grammars/BnfTerm.cs @@ -0,0 +1,284 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Parsing.Grammars +{ + [Flags] + public enum TermFlags + { + None = 0, + IsOperator = 0x01, + IsOpenBrace = 0x02, + IsCloseBrace = 0x04, + IsBrace = IsOpenBrace | IsCloseBrace, + IsLiteral = 0x08, + + IsConstant = 0x10, + IsPunctuation = 0x20, + IsDelimiter = 0x40, + IsReservedWord = 0x080, + IsMemberSelect = 0x100, + InheritPrecedence = 0x200, // Signals that non-terminal must inherit precedence and assoc values from its children. + // Typically set for BinOp nonterminal (where BinOp.Rule = '+' | '-' | ...) + + IsNonScanner = 0x01000, // indicates that tokens for this terminal are NOT produced by scanner + IsNonGrammar = 0x02000, // if set, parser would eliminate the token from the input stream; terms in Grammar.NonGrammarTerminals have this flag set + IsTransient = 0x04000, // Transient non-terminal - should be replaced by it's child in the AST tree. + IsNotReported = 0x08000, // Exclude from expected terminals list on syntax error + + //calculated flags + IsNullable = 0x010000, + + IsVisible = 0x020000, + IsKeyword = 0x040000, + IsMultiline = 0x100000, + + //internal flags + IsList = 0x200000, + + IsListContainer = 0x400000, + + //Indicates not to create AST node; mainly to suppress warning message on some special nodes that AST node type is not specified + //Automatically set by MarkTransient method + NoAstNode = 0x800000, + + //A flag to suppress automatic AST creation for child nodes in global AST construction. Will be used to supress full + // "compile" of method bodies in modules. The module might be large, but the running code might + // be actually using only a few methods or global members; so in this case it makes sense to "compile" only global/public + // declarations, including method headers but not full bodies. The body will be compiled on the first call. + // This makes even more sense when processing module imports. + AstDelayChildren = 0x1000000, + } + + //Basic Backus-Naur Form element. Base class for Terminal, NonTerminal, BnfExpression, GrammarHint + public abstract class BnfTerm + { + #region consructors + + public BnfTerm(string name) : this(name, name) + { + } + + public BnfTerm(string name, string errorAlias, Type nodeType) : this(name, errorAlias) + { + AstConfig.NodeType = nodeType; + } + + public BnfTerm(string name, string errorAlias, AstNodeCreator nodeCreator) : this(name, errorAlias) + { + AstConfig.NodeCreator = nodeCreator; + } + + public BnfTerm(string name, string errorAlias) + { + Name = name; + ErrorAlias = errorAlias; + _hashCode = _hashCounter++.GetHashCode(); + } + + #endregion + + #region virtuals and overrides + + public virtual void Init(GrammarData grammarData) + { + GrammarData = grammarData; + } + + public virtual string GetParseNodeCaption(ParseTreeNode node) + { + return GrammarData is not null ? GrammarData.Grammar.GetParseNodeCaption(node) : Name; + } + + public override string ToString() + { + return Name; + } + + //Hash code - we use static counter to generate hash codes + private static int _hashCounter; + + private readonly int _hashCode; + + public override int GetHashCode() + { + return _hashCode; + } + + #endregion + + public const int NoPrecedence = 0; + + #region properties: Name, DisplayName, Key, Options + + public string Name; + + //ErrorAlias is used in error reporting, e.g. "Syntax error, expected ". + public string ErrorAlias; + + public TermFlags Flags; + protected GrammarData GrammarData; + public int Precedence = NoPrecedence; + public Associativity Associativity = Associativity.Neutral; + + public Grammar Grammar + { + get { return GrammarData.Grammar; } + } + + public void SetFlag(TermFlags flag) + { + SetFlag(flag, true); + } + + public void SetFlag(TermFlags flag, bool value) + { + if (value) + Flags |= flag; + else + Flags &= ~flag; + } + + #endregion + + #region events: Shifting + + public event EventHandler Shifting; + + public event EventHandler AstNodeCreated; //an event fired after AST node is created. + + protected internal void OnShifting(ParsingEventArgs args) + { + Shifting?.Invoke(this, args); + } + + protected internal void OnAstNodeCreated(ParseTreeNode parseNode) + { + if (AstNodeCreated == null || parseNode.AstNode == null) return; + var args = new AstNodeEventArgs(parseNode); + AstNodeCreated(this, args); + } + + #endregion + + #region AST node creations: AstNodeType, AstNodeCreator, AstNodeCreated + + //We autocreate AST config on first GET; + public AstNodeConfig AstConfig + { + get + { + if (_astConfig == null) + _astConfig = new AstNodeConfig(); + return _astConfig; + } + set { _astConfig = value; } + } + + private AstNodeConfig _astConfig; + + public bool HasAstConfig() + { + return _astConfig is not null; + } + + #endregion + + #region Kleene operator Q() + + private NonTerminal _q; + + public BnfExpression Q() + { + if (_q is not null) + return _q; + _q = new NonTerminal(Name + "?") + { + Rule = this | Grammar.CurrentGrammar.Empty + }; + return _q; + } + + #endregion + + #region Operators: +, |, implicit + + public static BnfExpression operator +(BnfTerm term1, BnfTerm term2) + { + return Op_Plus(term1, term2); + } + + public static BnfExpression operator +(BnfTerm term1, string symbol2) + { + return Op_Plus(term1, Grammar.CurrentGrammar.ToTerm(symbol2)); + } + + public static BnfExpression operator +(string symbol1, BnfTerm term2) + { + return Op_Plus(Grammar.CurrentGrammar.ToTerm(symbol1), term2); + } + + //Alternative + public static BnfExpression operator |(BnfTerm term1, BnfTerm term2) + { + return Op_Pipe(term1, term2); + } + + public static BnfExpression operator |(BnfTerm term1, string symbol2) + { + return Op_Pipe(term1, Grammar.CurrentGrammar.ToTerm(symbol2)); + } + + public static BnfExpression operator |(string symbol1, BnfTerm term2) + { + return Op_Pipe(Grammar.CurrentGrammar.ToTerm(symbol1), term2); + } + + //BNF operations implementation ----------------------- + // Plus/sequence + internal static BnfExpression Op_Plus(BnfTerm term1, BnfTerm term2) + { + //Check term1 and see if we can use it as result, simply adding term2 as operand + BnfExpression expr1 = term1 as BnfExpression; + if (expr1 == null || expr1.Data.Count > 1) //either not expression at all, or Pipe-type expression (count > 1) + expr1 = new BnfExpression(term1); + expr1.Data[expr1.Data.Count - 1].Add(term2); + return expr1; + } + + //Pipe/Alternative + //New version proposed by the codeplex user bdaugherty + internal static BnfExpression Op_Pipe(BnfTerm term1, BnfTerm term2) + { + BnfExpression expr1 = term1 as BnfExpression; + if (expr1 == null) + expr1 = new BnfExpression(term1); + BnfExpression expr2 = term2 as BnfExpression; + if (expr2 == null) + expr2 = new BnfExpression(term2); + expr1.Data.AddRange(expr2.Data); + return expr1; + } + + #endregion + }//class + + public class BnfTermList : List + { } + + public class BnfTermSet : HashSet + { } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/Grammar.cs b/src/Irony/Parsing/Grammars/Grammar.cs new file mode 100644 index 0000000..187cc2f --- /dev/null +++ b/src/Irony/Parsing/Grammars/Grammar.cs @@ -0,0 +1,645 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; +using Sanchime.Irony.Parsing.Parsers.SpecialActionsHints; +using Sanchime.Irony.Parsing.TokenFilters; +using System.Globalization; + +namespace Sanchime.Irony.Parsing.Grammars +{ + public class Grammar + { + #region properties + + /// + /// Gets case sensitivity of the grammar. Read-only, true by default. + /// Can be set to false only through a parameter to grammar constructor. + /// + public readonly bool CaseSensitive; + + //List of chars that unambigously identify the start of new token. + //used in scanner error recovery, and in quick parse path in NumberLiterals, Identifiers + [Obsolete("Use IsWhitespaceOrDelimiter() method instead.")] + public string Delimiters = null; + + [Obsolete("Override Grammar.SkipWhitespace method instead.")] + public string WhitespaceChars = " \t\r\n\v"; + + public LanguageFlags LanguageFlags = LanguageFlags.Default; + + public TermReportGroupList TermReportGroups = new TermReportGroupList(); + + //Terminals not present in grammar expressions and not reachable from the Root + // (Comment terminal is usually one of them) + // Tokens produced by these terminals will be ignored by parser input. + public readonly TerminalSet NonGrammarTerminals = new TerminalSet(); + + /// + /// The main root entry for the grammar. + /// + public NonTerminal Root; + + /// + /// Alternative roots for parsing code snippets. + /// + public NonTerminalSet SnippetRoots = new NonTerminalSet(); + + public string GrammarComments; //shown in Grammar info tab + + public CultureInfo DefaultCulture = CultureInfo.InvariantCulture; + + //Console-related properties, initialized in grammar constructor + public string ConsoleTitle; + + public string ConsoleGreeting; + public string ConsolePrompt; //default prompt + public string ConsolePromptMoreInput; //prompt to show when more input is expected + + #endregion + + #region constructors + + public Grammar() : this(true) + { + } //case sensitive by default + + public Grammar(bool caseSensitive) + { + _currentGrammar = this; + CaseSensitive = caseSensitive; + var stringComparer = caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase; + + KeyTerms = new KeyTermTable(stringComparer); + //Initialize console attributes + ConsoleTitle = Resources.MsgDefaultConsoleTitle; + ConsoleGreeting = string.Format(Resources.MsgDefaultConsoleGreeting, GetType().Name); + ConsolePrompt = ">"; + ConsolePromptMoreInput = "."; + } + + #endregion + + #region Reserved words handling + + //Reserved words handling + public void MarkReservedWords(params string[] reservedWords) + { + foreach (string word in reservedWords) + { + var wdTerm = ToTerm(word); + wdTerm.SetFlag(TermFlags.IsReservedWord); + } + } + + #endregion + + #region Register/Mark methods + + public void RegisterOperators(int precedence, params string[] opSymbols) + { + RegisterOperators(precedence, Associativity.Left, opSymbols); + } + + public void RegisterOperators(int precedence, Associativity associativity, params string[] opSymbols) + { + foreach (string op in opSymbols) + { + KeyTerm opSymbol = ToTerm(op); + opSymbol.SetFlag(TermFlags.IsOperator); + opSymbol.Precedence = precedence; + opSymbol.Associativity = associativity; + } + }//method + + public void RegisterOperators(int precedence, params BnfTerm[] opTerms) + { + RegisterOperators(precedence, Associativity.Left, opTerms); + } + + public void RegisterOperators(int precedence, Associativity associativity, params BnfTerm[] opTerms) + { + foreach (var term in opTerms) + { + term.SetFlag(TermFlags.IsOperator); + term.Precedence = precedence; + term.Associativity = associativity; + } + } + + public void RegisterBracePair(string openBrace, string closeBrace) + { + var openS = ToTerm(openBrace); + var closeS = ToTerm(closeBrace); + openS.SetFlag(TermFlags.IsOpenBrace); + openS.IsPairFor = closeS; + closeS.SetFlag(TermFlags.IsCloseBrace); + closeS.IsPairFor = openS; + } + + public void MarkPunctuation(params string[] symbols) + { + foreach (string symbol in symbols) + { + KeyTerm term = ToTerm(symbol); + term.SetFlag(TermFlags.IsPunctuation | TermFlags.NoAstNode); + } + } + + public void MarkPunctuation(params BnfTerm[] terms) + { + foreach (BnfTerm term in terms) + term.SetFlag(TermFlags.IsPunctuation | TermFlags.NoAstNode); + } + + public void MarkTransient(params NonTerminal[] nonTerminals) + { + foreach (NonTerminal nt in nonTerminals) + nt.Flags |= TermFlags.IsTransient | TermFlags.NoAstNode; + } + + //MemberSelect are symbols invoking member list dropdowns in editor; for ex: . (dot), :: + public void MarkMemberSelect(params string[] symbols) + { + foreach (string symbol in symbols) + { + ToTerm(symbol).SetFlag(TermFlags.IsMemberSelect); + } + } + + //Sets IsNotReported flag on terminals. As a result the terminal wouldn't appear in expected terminal list + // in syntax error messages + public void MarkNotReported(params BnfTerm[] terms) + { + foreach (var term in terms) + { + term.SetFlag(TermFlags.IsNotReported); + } + } + + public void MarkNotReported(params string[] symbols) + { + foreach (var symbol in symbols) + { + ToTerm(symbol).SetFlag(TermFlags.IsNotReported); + } + } + + #endregion + + #region virtual methods: CreateTokenFilters, TryMatch + + public virtual void CreateTokenFilters(LanguageData language, TokenFilterList filters) + { + } + + //This method is called if Scanner fails to produce a token; it offers custom method a chance to produce the token + public virtual Token TryMatch(ParsingContext context, ISourceStream source) + { + return null; + } + + //Gives a way to customize parse tree nodes captions in the tree view. + public virtual string GetParseNodeCaption(ParseTreeNode node) + { + if (node.IsError) + return node.Term.Name + " (Syntax error)"; + if (node.Token != null) + return node.Token.ToString(); + if (node.Term == null) //special case for initial node pushed into the stack at parser start + return node.State != null ? string.Empty : "(State " + node.State.Name + ")"; // Resources.LabelInitialState; + var ntTerm = node.Term as NonTerminal; + if (ntTerm != null && !string.IsNullOrEmpty(ntTerm.NodeCaptionTemplate)) + return ntTerm.GetNodeCaption(node); + return node.Term.Name; + } + + /// + /// Override this method to help scanner select a terminal to create token when there are more than one candidates + /// for an input char. context.CurrentTerminals contains candidate terminals; leave a single terminal in this list + /// as the one to use. + /// + public virtual void OnScannerSelectTerminal(ParsingContext context) + { } + + /// Skips whitespace characters in the input stream. + /// Override this method if your language has non-standard whitespace characters. + /// Source stream. + public virtual void SkipWhitespace(ISourceStream source) + { + while (!source.EOF()) + { + switch (source.PreviewChar) + { + case ' ': + case '\t': + break; + + case '\r': + case '\n': + case '\v': + if (UsesNewLine) return; //do not treat as whitespace if language is line-based + break; + + default: + return; + }//switch + source.PreviewPosition++; + }//while + }//method + + /// Returns true if a character is whitespace or delimiter. Used in quick-scanning versions of some terminals. + /// The character to check. + /// True if a character is whitespace or delimiter; otherwise, false. + /// Does not have to be completely accurate, should recognize most common characters that are special chars by themselves + /// and may never be part of other multi-character tokens. + public virtual bool IsWhitespaceOrDelimiter(char ch) + { + switch (ch) + { + case ' ': + case '\t': + case '\r': + case '\n': + case '\v': //whitespaces + case '(': + case ')': + case ',': + case ';': + case '[': + case ']': + case '{': + case '}': + case (char)0: //EOF + return true; + + default: + return false; + }//switch + }//method + + //The method is called after GrammarData is constructed + public virtual void OnGrammarDataConstructed(LanguageData language) + { + } + + public virtual void OnLanguageDataConstructed(LanguageData language) + { + } + + //Constructs the error message in situation when parser has no available action for current input. + // override this method if you want to change this message + public virtual string ConstructParserErrorMessage(ParsingContext context, StringSet expectedTerms) + { + return expectedTerms.Count switch + { + > 0 => string.Format(Resources.ErrSyntaxErrorExpected, expectedTerms.ToString(", ")), + _ => Resources.ErrParserUnexpectedInput + }; + } + + // Override this method to perform custom error processing + public virtual void ReportParseError(ParsingContext context) + { + string error = null; + if (context.CurrentParserInput.Term == SyntaxError) + error = context.CurrentParserInput.Token.Value as string; //scanner error + else if (context.CurrentParserInput.Term == Indent) + error = Resources.ErrUnexpIndent; + else if (context.CurrentParserInput.Term == Eof && context.OpenBraces.Count > 0) + { + if (context.OpenBraces.Count > 0) + { + //report unclosed braces/parenthesis + var openBrace = context.OpenBraces.Peek(); + error = string.Format(Resources.ErrNoClosingBrace, openBrace.Text); + } + else + error = Resources.ErrUnexpEof; + } + else + { + var expectedTerms = context.GetExpectedTermSet(); + error = ConstructParserErrorMessage(context, expectedTerms); + } + context.AddParserError(error); + }//method + + #endregion + + #region MakePlusRule, MakeStarRule methods + + public BnfExpression MakePlusRule(NonTerminal listNonTerminal, BnfTerm listMember) + { + return MakeListRule(listNonTerminal, null, listMember); + } + + public BnfExpression MakePlusRule(NonTerminal listNonTerminal, BnfTerm delimiter, BnfTerm listMember) + { + return MakeListRule(listNonTerminal, delimiter, listMember); + } + + public BnfExpression MakeStarRule(NonTerminal listNonTerminal, BnfTerm listMember) + { + return MakeListRule(listNonTerminal, null, listMember, TermListOptions.StarList); + } + + public BnfExpression MakeStarRule(NonTerminal listNonTerminal, BnfTerm delimiter, BnfTerm listMember) + { + return MakeListRule(listNonTerminal, delimiter, listMember, TermListOptions.StarList); + } + + protected BnfExpression MakeListRule(NonTerminal list, BnfTerm delimiter, BnfTerm listMember, TermListOptions options = TermListOptions.PlusList) + { + //If it is a star-list (allows empty), then we first build plus-list + var isPlusList = !options.IsSet(TermListOptions.AllowEmpty); + var allowTrailingDelim = options.IsSet(TermListOptions.AllowTrailingDelimiter) && delimiter != null; + //"plusList" is the list for which we will construct expression - it is either extra plus-list or original list. + // In the former case (extra plus-list) we will use it later to construct expression for list + NonTerminal plusList = isPlusList ? list : new NonTerminal(listMember.Name + "+"); + plusList.SetFlag(TermFlags.IsList); + plusList.Rule = plusList; // rule => list + if (delimiter != null) + plusList.Rule += delimiter; // rule => list + delim + if (options.IsSet(TermListOptions.AddPreferShiftHint)) + plusList.Rule += PreferShiftHere(); // rule => list + delim + PreferShiftHere() + plusList.Rule += listMember; // rule => list + delim + PreferShiftHere() + elem + plusList.Rule |= listMember; // rule => list + delim + PreferShiftHere() + elem | elem + if (isPlusList) + { + // if we build plus list - we're almost done; plusList == list + // add trailing delimiter if necessary; for star list we'll add it to final expression + if (allowTrailingDelim) + plusList.Rule |= list + delimiter; // rule => list + delim + PreferShiftHere() + elem | elem | list + delim + } + else + { + // Setup list.Rule using plus-list we just created + list.Rule = Empty | plusList; + if (allowTrailingDelim) + list.Rule |= plusList + delimiter | delimiter; + plusList.SetFlag(TermFlags.NoAstNode); + list.SetFlag(TermFlags.IsListContainer); //indicates that real list is one level lower + } + return list.Rule; + }//method + + #endregion + + #region Hint utilities + + protected GrammarHint PreferShiftHere() + { + return new PreferredActionHint(PreferredActionType.Shift); + } + + protected GrammarHint ReduceHere() + { + return new PreferredActionHint(PreferredActionType.Reduce); + } + + protected TokenPreviewHint ReduceIf(string thisSymbol, params string[] comesBefore) + { + return new TokenPreviewHint(PreferredActionType.Reduce, thisSymbol, comesBefore); + } + + protected TokenPreviewHint ReduceIf(Terminal thisSymbol, params Terminal[] comesBefore) + { + return new TokenPreviewHint(PreferredActionType.Reduce, thisSymbol, comesBefore); + } + + protected TokenPreviewHint ShiftIf(string thisSymbol, params string[] comesBefore) + { + return new TokenPreviewHint(PreferredActionType.Shift, thisSymbol, comesBefore); + } + + protected TokenPreviewHint ShiftIf(Terminal thisSymbol, params Terminal[] comesBefore) + { + return new TokenPreviewHint(PreferredActionType.Shift, thisSymbol, comesBefore); + } + + protected GrammarHint ImplyPrecedenceHere(int precedence) + { + return ImplyPrecedenceHere(precedence, Associativity.Left); + } + + protected GrammarHint ImplyPrecedenceHere(int precedence, Associativity associativity) + { + return new ImpliedPrecedenceHint(precedence, associativity); + } + + protected CustomActionHint CustomActionHere(ExecuteActionMethod executeMethod, PreviewActionMethod previewMethod = null) + { + return new CustomActionHint(executeMethod, previewMethod); + } + + #endregion + + #region Term report group methods + + /// + /// Creates a terminal reporting group, so all terminals in the group will be reported as a single "alias" in syntex error messages like + /// "Syntax error, expected: [list of terms]" + /// + /// An alias for all terminals in the group. + /// Symbols to be included into the group. + protected void AddTermsReportGroup(string alias, params string[] symbols) + { + TermReportGroups.Add(new TermReportGroup(alias, TermReportGroupType.Normal, SymbolsToTerms(symbols))); + } + + /// + /// Creates a terminal reporting group, so all terminals in the group will be reported as a single "alias" in syntex error messages like + /// "Syntax error, expected: [list of terms]" + /// + /// An alias for all terminals in the group. + /// Terminals to be included into the group. + protected void AddTermsReportGroup(string alias, params Terminal[] terminals) + { + TermReportGroups.Add(new TermReportGroup(alias, TermReportGroupType.Normal, terminals)); + } + + /// + /// Adds symbols to a group with no-report type, so symbols will not be shown in expected lists in syntax error messages. + /// + /// Symbols to exclude. + protected void AddToNoReportGroup(params string[] symbols) + { + TermReportGroups.Add(new TermReportGroup(string.Empty, TermReportGroupType.DoNotReport, SymbolsToTerms(symbols))); + } + + /// + /// Adds symbols to a group with no-report type, so symbols will not be shown in expected lists in syntax error messages. + /// + /// Symbols to exclude. + protected void AddToNoReportGroup(params Terminal[] terminals) + { + TermReportGroups.Add(new TermReportGroup(string.Empty, TermReportGroupType.DoNotReport, terminals)); + } + + /// + /// Adds a group and an alias for all operator symbols used in the grammar. + /// + /// An alias for operator symbols. + protected void AddOperatorReportGroup(string alias) + { + TermReportGroups.Add(new TermReportGroup(alias, TermReportGroupType.Operator, null)); //operators will be filled later + } + + private IEnumerable SymbolsToTerms(IEnumerable symbols) + { + var termList = new TerminalList(); + foreach (var symbol in symbols) + termList.Add(ToTerm(symbol)); + return termList; + } + + #endregion + + #region Standard terminals: EOF, Empty, NewLine, Indent, Dedent + + // Empty object is used to identify optional element: + // term.Rule = term1 | Empty; + public readonly Terminal Empty = new("EMPTY"); + + public readonly NewLineTerminal NewLine = new("LF"); + + //set to true automatically by NewLine terminal; prevents treating new-line characters as whitespaces + public bool UsesNewLine; + + // The following terminals are used in indent-sensitive languages like Python; + // they are not produced by scanner but are produced by CodeOutlineFilter after scanning + public readonly Terminal Indent = new("INDENT", TokenCategory.Outline, TermFlags.IsNonScanner); + + public readonly Terminal Dedent = new("DEDENT", TokenCategory.Outline, TermFlags.IsNonScanner); + + //End-of-Statement terminal - used in indentation-sensitive language to signal end-of-statement; + // it is not always synced with CRLF chars, and CodeOutlineFilter carefully produces Eos tokens + // (as well as Indent and Dedent) based on line/col information in incoming content tokens. + public readonly Terminal Eos = new("EOS", Resources.LabelEosLabel, TokenCategory.Outline, TermFlags.IsNonScanner); + + // Identifies end of file + // Note: using Eof in grammar rules is optional. Parser automatically adds this symbol + // as a lookahead to Root non-terminal + public readonly Terminal Eof = new("EOF", TokenCategory.Outline); + + //Artificial terminal to use for injected/replaced tokens that must be ignored by parser. + public readonly Terminal Skip = new("(SKIP)", TokenCategory.Outline, TermFlags.IsNonGrammar); + + //Used as a "line-start" indicator + public readonly Terminal LineStartTerminal = new("LINE_START", TokenCategory.Outline); + + //Used for error tokens + public readonly Terminal SyntaxError = new("SYNTAX_ERROR", TokenCategory.Error, TermFlags.IsNonScanner); + + public NonTerminal NewLinePlus + { + get + { + if (_newLinePlus == null) + { + _newLinePlus = new NonTerminal("LF+"); + //We do no use MakePlusRule method; we specify the rule explicitly to add PrefereShiftHere call - this solves some unintended shift-reduce conflicts + // when using NewLinePlus + _newLinePlus.Rule = NewLine | _newLinePlus + PreferShiftHere() + NewLine; + MarkPunctuation(_newLinePlus); + _newLinePlus.SetFlag(TermFlags.IsList); + } + return _newLinePlus; + } + } + + private NonTerminal _newLinePlus; + + public NonTerminal NewLineStar + { + get + { + if (_newLineStar == null) + { + _newLineStar = new NonTerminal("LF*"); + MarkPunctuation(_newLineStar); + _newLineStar.Rule = MakeStarRule(_newLineStar, NewLine); + } + return _newLineStar; + } + } + + private NonTerminal _newLineStar; + + #endregion + + #region KeyTerms (keywords + special symbols) + + public KeyTermTable KeyTerms; + + public KeyTerm ToTerm(string text) + { + return ToTerm(text, text); + } + + public KeyTerm ToTerm(string text, string name) + { + if (KeyTerms.TryGetValue(text, out KeyTerm term)) + { + //update name if it was specified now and not before + if (string.IsNullOrEmpty(term.Name) && !string.IsNullOrEmpty(name)) + term.Name = name; + return term; + } + //create new term + if (!CaseSensitive) + text = text.ToLowerInvariant(); + //string.Intern(text); + term = new KeyTerm(text, name); + KeyTerms[text] = term; + return term; + } + + #endregion + + #region CurrentGrammar static field + + //Static per-thread instance; Grammar constructor sets it to self (this). + // This field/property is used by operator overloads (which are static) to access Grammar's predefined terminals like Empty, + // and SymbolTerms dictionary to convert string literals to symbol terminals and add them to the SymbolTerms dictionary + [ThreadStatic] + private static Grammar _currentGrammar; + + public static Grammar CurrentGrammar + { + get { return _currentGrammar; } + } + + internal static void ClearCurrentGrammar() + { + _currentGrammar = null; + } + + #endregion + + #region AST construction + + public virtual void BuildAst(LanguageData language, ParseTree parseTree) + { + if (!LanguageFlags.IsSet(LanguageFlags.CreateAst)) + return; + var astContext = new AstContext(language); + var astBuilder = new AstBuilder(astContext); + astBuilder.BuildAst(parseTree); + } + + #endregion + } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/GrammarEnums.cs b/src/Irony/Parsing/Grammars/GrammarEnums.cs new file mode 100644 index 0000000..2ba3700 --- /dev/null +++ b/src/Irony/Parsing/Grammars/GrammarEnums.cs @@ -0,0 +1,57 @@ +namespace Sanchime.Irony.Parsing.Grammars +{ + [Flags] + public enum LanguageFlags + { + None = 0, + + //Compilation options + //Be careful - use this flag ONLY if you use NewLine terminal in grammar explicitly! + // - it happens only in line-based languages like Basic. + NewLineBeforeEOF = 0x01, + + //Emit LineStart token + EmitLineStartToken = 0x02, + + DisableScannerParserLink = 0x04, //in grammars that define TokenFilters (like Python) this flag should be set + CreateAst = 0x08, //create AST nodes + + //Runtime + SupportsCommandLine = 0x0200, + + TailRecursive = 0x0400, //Tail-recursive language - Scheme is one example + SupportsBigInt = 0x01000, + SupportsComplex = 0x02000, + SupportsRational = 0x04000, + + //Default value + Default = None, + } + + //Operator associativity types + public enum Associativity + { + Left, + Right, + Neutral //honestly don't know what that means, but it is mentioned in literature + } + + //Used by Make-list-rule methods + [Flags] + public enum TermListOptions + { + None = 0, + AllowEmpty = 0x01, + AllowTrailingDelimiter = 0x02, + + // In some cases this hint would help to resolve the conflicts that come up when you have two lists separated by a nullable term. + // This hint would resolve the conflict, telling the parser to include as many as possible elements in the first list, and the rest, + // if any, would go to the second list. By default, this flag is included in Star and Plus lists. + AddPreferShiftHint = 0x04, + + //Combinations - use these + PlusList = AddPreferShiftHint, + + StarList = AllowEmpty | AddPreferShiftHint, + } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/GrammarError.cs b/src/Irony/Parsing/Grammars/GrammarError.cs new file mode 100644 index 0000000..2342368 --- /dev/null +++ b/src/Irony/Parsing/Grammars/GrammarError.cs @@ -0,0 +1,83 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Grammars +{ + public enum GrammarErrorLevel + { + NoError, //used only for max error level when there are no errors + Info, + Warning, + Conflict, //shift-reduce or reduce-reduce conflict + Error, //severe grammar error, parser construction cannot continue + InternalError, //internal Irony error + } + + public class GrammarError + { + public readonly GrammarErrorLevel Level; + public readonly string Message; + public readonly ParserState State; //can be null! + + public GrammarError(GrammarErrorLevel level, ParserState state, string message) + { + Level = level; + State = state; + Message = message; + } + + public override string ToString() + { + return Message + " (" + State + ")"; + } + }//class + + public class GrammarErrorList : List + { + public void Add(GrammarErrorLevel level, ParserState state, string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + Add(new GrammarError(level, state, message)); + } + + public void AddAndThrow(GrammarErrorLevel level, ParserState state, string message, params object[] args) + { + Add(level, state, message, args); + var error = this[Count - 1]; + var exc = new GrammarErrorException(error.Message, error); + throw exc; + } + + public GrammarErrorLevel GetMaxLevel() + { + var max = GrammarErrorLevel.NoError; + foreach (var err in this) + if (max < err.Level) + max = err.Level; + return max; + } + } + + //Used to cancel parser construction when fatal error is found + public class GrammarErrorException : Exception + { + public readonly GrammarError Error; + + public GrammarErrorException(string message, GrammarError error) : base(message) + { + Error = error; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/GrammarHint.cs b/src/Irony/Parsing/Grammars/GrammarHint.cs new file mode 100644 index 0000000..b0c9d3a --- /dev/null +++ b/src/Irony/Parsing/Grammars/GrammarHint.cs @@ -0,0 +1,47 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Grammars +{ + public class GrammarHintList : List + { } + + //Hints are additional instructions for parser added inside BNF expressions. + // Hint refers to specific position inside the expression (production), so hints are associated with LR0Item object + // One example is a PreferredActionHint produced by the Grammar.PreferShiftHere() method. It tells parser to perform + // shift in case of a shift/reduce conflict. It is in fact the default action of LALR parser, so the hint simply suppresses the error + // message about the shift/reduce conflict in the grammar. + public abstract class GrammarHint : BnfTerm + { + public GrammarHint() : base("hint") + { + } + + /// Gives a chance to a custom code in hint to interfere in parser automaton construction. + /// The LanguageData instance. + /// The LRItem that "owns" the hint. + /// + /// The most common purpose of this method (it's overrides) is to resolve the conflicts + /// by adding specific actions into State.Actions dictionary. + /// The owner parameter represents the position in the grammar expression where the hint + /// is found. The parser state is available through owner.State property. + /// + public virtual void Apply(LanguageData language, LRItem owner) + { + // owner.State -- the parser state + // owner.State.BuilderData.Conflicts -- as set of conflict terminals + // owner.State.Actions -- a dictionary of actions in the current state. + } + } //class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/ICanRunSample.cs b/src/Irony/Parsing/Grammars/ICanRunSample.cs new file mode 100644 index 0000000..28eb5c3 --- /dev/null +++ b/src/Irony/Parsing/Grammars/ICanRunSample.cs @@ -0,0 +1,22 @@ +namespace Sanchime.Irony.Parsing.Grammars +{ + // Should be implemented by Grammar class to be able to run samples in Grammar Explorer. + public interface ICanRunSample + { + string RunSample(RunSampleArgs args); + } + + public class RunSampleArgs + { + public LanguageData Language; + public string Sample; + public ParseTree ParsedSample; + + public RunSampleArgs(LanguageData language, string sample, ParseTree parsedSample) + { + Language = language; + Sample = sample; + ParsedSample = parsedSample; + } + } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/LanguageAttribute.cs b/src/Irony/Parsing/Grammars/LanguageAttribute.cs new file mode 100644 index 0000000..cfac879 --- /dev/null +++ b/src/Irony/Parsing/Grammars/LanguageAttribute.cs @@ -0,0 +1,69 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Reflection; + +namespace Sanchime.Irony.Parsing.Grammars +{ + [AttributeUsage(AttributeTargets.Class)] + public class LanguageAttribute : Attribute + { + public LanguageAttribute() : this(null) + { + } + + public LanguageAttribute(string languageName) : this(languageName, "1.0", string.Empty) + { + } + + public LanguageAttribute(string languageName, string version, string description) + { + _languageName = languageName; + _version = version; + _description = description; + } + + public string LanguageName + { + get { return _languageName; } + } + + private readonly string _languageName; + + public string Version + { + get { return _version; } + } + + private string _version; + + public string Description + { + get { return _description; } + } + + private string _description; + + public static LanguageAttribute GetValue(Type grammarClass) + { + var attrs = grammarClass.GetTypeInfo().GetCustomAttributes(typeof(LanguageAttribute), true); + if (attrs != null && attrs.Count() > 0) + { + LanguageAttribute la = attrs.FirstOrDefault() as LanguageAttribute; + return la; + } + return null; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/NonTerminal.cs b/src/Irony/Parsing/Grammars/NonTerminal.cs new file mode 100644 index 0000000..b21bcf1 --- /dev/null +++ b/src/Irony/Parsing/Grammars/NonTerminal.cs @@ -0,0 +1,183 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Parsing.Grammars +{ + internal class IntList : List + { } + + public partial class NonTerminal : BnfTerm + { + #region + + public NonTerminal(string name) : base(name, null) + { + } //by default display name is null + + public NonTerminal(string name, string errorAlias) : base(name, errorAlias) + { + } + + public NonTerminal(string name, string errorAlias, Type nodeType) : base(name, errorAlias, nodeType) + { + } + + public NonTerminal(string name, string errorAlias, AstNodeCreator nodeCreator) : base(name, errorAlias, nodeCreator) + { + } + + public NonTerminal(string name, Type nodeType) : base(name, null, nodeType) + { + } + + public NonTerminal(string name, AstNodeCreator nodeCreator) : base(name, null, nodeCreator) + { + } + + public NonTerminal(string name, BnfExpression expression) + : this(name) + { + Rule = expression; + } + + #endregion + + #region properties/fields: Rule, ErrorRule + + public BnfExpression Rule; + + //Separate property for specifying error expressions. This allows putting all such expressions in a separate section + // in grammar for all non-terminals. However you can still put error expressions in the main Rule property, just like + // in YACC + public BnfExpression ErrorRule; + + //A template for representing ParseTreeNode in the parse tree. Can contain '#{i}' fragments referencing + // child nodes by index + public string NodeCaptionTemplate; + + //Converted template with index list + private string _convertedTemplate; + + private IntList _captionParameters; + + // Productions are used internally by Parser builder + internal ProductionList Productions = new(); + + #endregion + + #region Events: Reduced + + //Note that Reduced event may be called more than once for a List node + public event EventHandler Reduced; + + internal void OnReduced(ParsingContext context, Production reducedProduction, ParseTreeNode resultNode) + { + Reduced?.Invoke(this, new ReducedEventArgs(context, reducedProduction, resultNode)); + } + + #endregion + + #region overrides: ToString, Init + + public override string ToString() + { + return Name; + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + if (!string.IsNullOrEmpty(NodeCaptionTemplate)) + ConvertNodeCaptionTemplate(); + } + + #endregion + + // Contributed by Alexey Yakovlev (yallie) + + #region Grammar hints + + // Adds a hint at the end of all productions + public void AddHintToAll(GrammarHint hint) + { + if (Rule == null) + throw new Exception("Rule property must be set on non-terminal before calling AddHintToAll."); + foreach (var plusList in Rule.Data) + plusList.Add(hint); + } + + #endregion + + #region NodeCaptionTemplate utilities + + //We replace original tag '#{i}' (where i is the index of the child node to put here) + // with the tag '{k}', where k is the number of the parameter. So after conversion the template can + // be used in string.Format() call, with parameters set to child nodes captions + private void ConvertNodeCaptionTemplate() + { + _captionParameters = new IntList(); + _convertedTemplate = NodeCaptionTemplate; + var index = 0; + while (index < 100) + { + var strParam = "#{" + index + "}"; + if (_convertedTemplate.Contains(strParam)) + { + _convertedTemplate = _convertedTemplate.Replace(strParam, "{" + _captionParameters.Count + "}"); + _captionParameters.Add(index); + } + if (!_convertedTemplate.Contains("#{")) return; + index++; + }//while + }//method + + public string GetNodeCaption(ParseTreeNode node) + { + var paramValues = new string[_captionParameters.Count]; + for (int i = 0; i < _captionParameters.Count; i++) + { + var childIndex = _captionParameters[i]; + if (childIndex < node.ChildNodes.Count) + { + var child = node.ChildNodes[childIndex]; + //if child is a token, then child.ToString returns token.ToString which contains Value + Term; + // in this case we prefer to have Value only + paramValues[i] = child.Token != null ? child.Token.ValueString : child.ToString(); + } + } + var result = string.Format(_convertedTemplate, paramValues); + return result; + } + + #endregion + }//class + + public class NonTerminalList : List + { + public override string ToString() + { + return string.Join(" ", this); + } + } + + public class NonTerminalSet : HashSet + { + public override string ToString() + { + return string.Join(" ", this); + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Grammars/TermReportGroups.cs b/src/Irony/Parsing/Grammars/TermReportGroups.cs new file mode 100644 index 0000000..2eeff18 --- /dev/null +++ b/src/Irony/Parsing/Grammars/TermReportGroups.cs @@ -0,0 +1,50 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Grammars +{ + //Terminal report group is a facility for improving syntax error messages. + // Irony parser/scanner reports an error like "Syntax error, invalid character. Expected: ." + // The is a list of all terminals (symbols) that are expected in current position. + // This list might quite long and quite difficult to look through. The solution is to provide Group names for + // groups of terminals - these are groups of type Normal. + // Some terminals might be excluded from showing in expected list by including them into group of type DoNotReport. + // Finally, Operator group allows you to specify group name for all operator symbols without listing operators - + // Irony will collect all operator symbols registered with RegisterOperator method automatically. + + public enum TermReportGroupType + { + Normal, + DoNotReport, + Operator + } + + public class TermReportGroup + { + public string Alias; + public TermReportGroupType GroupType; + public TerminalSet Terminals = new(); + + public TermReportGroup(string alias, TermReportGroupType groupType, IEnumerable terminals) + { + Alias = alias; + GroupType = groupType; + if (terminals != null) + Terminals.UnionWith(terminals); + } + }//class + + public class TermReportGroupList : List + { } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParseTree.cs b/src/Irony/Parsing/Parsers/ParseTree.cs new file mode 100644 index 0000000..97466f1 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParseTree.cs @@ -0,0 +1,157 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers +{ + /* + A node for a parse tree (concrete syntax tree) - an initial syntax representation produced by parser. + It contains all syntax elements of the input text, each element represented by a generic node ParseTreeNode. + The parse tree is converted into abstract syntax tree (AST) which contains custom nodes. The conversion might + happen on-the-fly: as parser creates the parse tree nodes it can create the AST nodes and puts them into AstNode field. + Alternatively it might happen as a separate step, after completing the parse tree. + AST node might optinally implement IAstNodeInit interface, so Irony parser can initialize the node providing it + with all relevant information. + The ParseTreeNode also works as a stack element in the parser stack, so it has the State property to carry + the pushed parser state while it is in the stack. + */ + + public class ParseTreeNode + { + public object AstNode; + public Token Token; + public BnfTerm Term; + public int Precedence; + public Associativity Associativity; + public SourceSpan Span; + + //Making ChildNodes property (not field) following request by Matt K, Bill H + public ParseTreeNodeList ChildNodes { get; private set; } + + public bool IsError; + internal ParserState State; //used by parser to store current state when node is pushed into the parser stack + public object Tag; //for use by custom parsers, Irony does not use it + public TokenList Comments; //Comments preceding this node + + private ParseTreeNode() + { + ChildNodes = new ParseTreeNodeList(); + } + + public ParseTreeNode(Token token) : this() + { + Token = token; + Term = token.Terminal; + Precedence = Term.Precedence; + Associativity = token.Terminal.Associativity; + Span = new SourceSpan(token.Location, token.Length); + IsError = token.IsError(); + } + + public ParseTreeNode(ParserState initialState) : this() + { + State = initialState; + } + + public ParseTreeNode(NonTerminal term, SourceSpan span) : this() + { + Term = term; + Span = span; + } + + public override string ToString() + { + if (Term == null) + return "(S0)"; //initial state node + else + return Term.GetParseNodeCaption(this); + }//method + + public string FindTokenAndGetText() + { + var tkn = FindToken(); + return tkn?.Text; + } + + public Token FindToken() + { + return FindFirstChildTokenRec(this); + } + + private static Token FindFirstChildTokenRec(ParseTreeNode node) + { + if (node.Token != null) return node.Token; + foreach (var child in node.ChildNodes) + { + var tkn = FindFirstChildTokenRec(child); + if (tkn != null) return tkn; + } + return null; + } + + /// Returns true if the node is punctuation or it is transient with empty child list. + /// True if parser can safely ignore this node. + public bool IsPunctuationOrEmptyTransient() + { + if (Term.Flags.IsSet(TermFlags.IsPunctuation)) + return true; + if (Term.Flags.IsSet(TermFlags.IsTransient) && ChildNodes.Count == 0) + return true; + return false; + } + + public bool IsOperator() + { + return Term.Flags.IsSet(TermFlags.IsOperator); + } + }//class + + public class ParseTreeNodeList : List + { } + + public enum ParseTreeStatus + { + Parsing, + Partial, + Parsed, + Error, + } + + public class ParseTree + { + public ParseTreeStatus Status { get; internal set; } + public readonly string SourceText; + public readonly string FileName; + public readonly TokenList Tokens = new TokenList(); + public readonly TokenList OpenBraces = new TokenList(); + public ParseTreeNode Root; + public readonly LogMessageList ParserMessages = new LogMessageList(); + public long ParseTimeMilliseconds; + public object Tag; //custom data object, use it anyway you want + + public ParseTree(string sourceText, string fileName) + { + SourceText = sourceText; + FileName = fileName; + Status = ParseTreeStatus.Parsing; + } + + public bool HasErrors() + { + if (ParserMessages.Count == 0) return false; + foreach (var err in ParserMessages) + if (err.Level == ErrorLevel.Error) return true; + return false; + }//method + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParseTreeExtensions.cs b/src/Irony/Parsing/Parsers/ParseTreeExtensions.cs new file mode 100644 index 0000000..fe56db8 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParseTreeExtensions.cs @@ -0,0 +1,61 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers +{ +#if !SILVERLIGHT + //public static class ParseTreeExtensions { + // public static string ToXml(this ParseTree parseTree) { + // if (parseTree == null || parseTree.Root == null) return string.Empty; + // var xdoc = ToXmlDocument(parseTree); + // StringWriter sw = new StringWriter(); + // XmlTextWriter xw = new XmlTextWriter(sw); + // xw.Formatting = Formatting.Indented; + // xdoc.WriteTo(xw); + // xw.Flush(); + // return sw.ToString(); + // } + + // public static XmlDocument ToXmlDocument(this ParseTree parseTree) { + // var xdoc = new XmlDocument(); + // if (parseTree == null || parseTree.Root == null) return xdoc; + // var xTree = xdoc.CreateElement("ParseTree"); + // xdoc.AppendChild(xTree); + // var xRoot = parseTree.Root.ToXmlElement(xdoc); + // xTree.AppendChild(xRoot); + // return xdoc; + // } + + // public static XmlElement ToXmlElement(this ParseTreeNode node, XmlDocument ownerDocument) { + // var xElem = ownerDocument.CreateElement("Node"); + // xElem.SetAttribute("Term", node.Term.Name); + // var term = node.Term; + // if (term.HasAstConfig() && term.AstConfig.NodeType != null) + // xElem.SetAttribute("AstNodeType", term.AstConfig.NodeType.Name); + // if (node.Token != null) { + // xElem.SetAttribute("Terminal", node.Term.GetType().Name); + // //xElem.SetAttribute("Text", node.Token.Text); + // if (node.Token.Value != null) + // xElem.SetAttribute("Value", node.Token.Value.ToString()); + // } else + // foreach (var child in node.ChildNodes) { + // var xChild = child.ToXmlElement(ownerDocument); + // xElem.AppendChild(xChild); + // } + // return xElem; + // }//method + + //}//class +#endif +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/Parser.cs b/src/Irony/Parsing/Parsers/Parser.cs new file mode 100644 index 0000000..a0fe796 --- /dev/null +++ b/src/Irony/Parsing/Parsers/Parser.cs @@ -0,0 +1,293 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; +using System.Diagnostics; + +namespace Sanchime.Irony.Parsing.Parsers +{ + //Parser class represents combination of scanner and LALR parser (CoreParser) + public class Parser + { + public readonly LanguageData Language; + public readonly ParserData Data; + private Grammar _grammar; + + //public readonly CoreParser CoreParser; + public readonly Scanner Scanner; + + public ParsingContext Context { get; internal set; } + public readonly NonTerminal Root; + + // Either language root or initial state for parsing snippets - like Ruby's expressions in strings : "result= #{x+y}" + internal readonly ParserState InitialState; + + public Parser(Grammar grammar) : this(new LanguageData(grammar)) + { + } + + public Parser(LanguageData language) : this(language, null) + { + } + + public Parser(LanguageData language, NonTerminal root) + { + Language = language; + Data = Language.ParserData; + _grammar = Language.Grammar; + Context = new ParsingContext(this); + Scanner = new Scanner(this); + Root = root; + if (Root == null) + { + Root = Language.Grammar.Root; + InitialState = Language.ParserData.InitialState; + } + else + { + if (Root != Language.Grammar.Root && !Language.Grammar.SnippetRoots.Contains(Root)) + throw new Exception(string.Format(Resources.ErrRootNotRegistered, root.Name)); + InitialState = Language.ParserData.InitialStates[Root]; + } + } + + internal void Reset() + { + Context.Reset(); + Scanner.Reset(); + } + + public ParseTree Parse(string sourceText) + { + return Parse(sourceText, "Source"); + } + + public ParseTree Parse(string sourceText, string fileName) + { + SourceLocation loc = default; + Reset(); + /* if (Context.Status == ParserStatus.AcceptedPartial) { + var oldLoc = Context.Source.Location; + loc = new SourceLocation(oldLoc.Position, oldLoc.Line + 1, 0); + } else { + }*/ + Context.Source = new SourceStream(sourceText, Language.Grammar.CaseSensitive, Context.TabWidth, loc); + Context.CurrentParseTree = new ParseTree(sourceText, fileName); + Context.Status = ParserStatus.Parsing; + var sw = new Stopwatch(); + sw.Start(); + ParseAll(); + //Set Parse status + var parseTree = Context.CurrentParseTree; + bool hasErrors = parseTree.HasErrors(); + if (hasErrors) + parseTree.Status = ParseTreeStatus.Error; + else if (Context.Status == ParserStatus.AcceptedPartial) + parseTree.Status = ParseTreeStatus.Partial; + else + parseTree.Status = ParseTreeStatus.Parsed; + //Build AST if no errors and AST flag is set + bool createAst = _grammar.LanguageFlags.IsSet(LanguageFlags.CreateAst); + if (createAst && !hasErrors) + Language.Grammar.BuildAst(Language, parseTree); + //Done; record the time + sw.Stop(); + parseTree.ParseTimeMilliseconds = sw.ElapsedMilliseconds; + if (parseTree.ParserMessages.Count > 0) + parseTree.ParserMessages.Sort(LogMessageList.ByLocation); + return parseTree; + } + + private void ParseAll() + { + //main loop + Context.Status = ParserStatus.Parsing; + while (Context.Status == ParserStatus.Parsing) + { + ExecuteNextAction(); + } + }//ParseAll method + + public ParseTree ScanOnly(string sourceText, string fileName) + { + Context.CurrentParseTree = new ParseTree(sourceText, fileName); + Context.Source = new SourceStream(sourceText, Language.Grammar.CaseSensitive, Context.TabWidth); + while (true) + { + var token = Scanner.GetToken(); + if (token == null || token.Terminal == Language.Grammar.Eof) break; + } + return Context.CurrentParseTree; + } + + #region Parser Action execution + + private void ExecuteNextAction() + { + //Read input only if DefaultReduceAction is null - in this case the state does not contain ExpectedSet, + // so parser cannot assist scanner when it needs to select terminal and therefore can fail + if (Context.CurrentParserInput == null && Context.CurrentParserState.DefaultAction == null) + ReadInput(); + //Check scanner error + if (Context.CurrentParserInput != null && Context.CurrentParserInput.IsError) + { + RecoverFromError(); + return; + } + //Try getting action + var action = GetNextAction(); + if (action == null) + { + if (CheckPartialInputCompleted()) return; + RecoverFromError(); + return; + } + //We have action. Write trace and execute it + if (Context.TracingEnabled) + Context.AddTrace(action.ToString()); + action.Execute(Context); + } + + internal ParserAction GetNextAction() + { + var currState = Context.CurrentParserState; + var currInput = Context.CurrentParserInput; + + if (currState.DefaultAction != null) + return currState.DefaultAction; + ParserAction action; + //First try as keyterm/key symbol; for example if token text = "while", then first try it as a keyword "while"; + // if this does not work, try as an identifier that happens to match a keyword but is in fact identifier + Token inputToken = currInput.Token; + if (inputToken != null && inputToken.KeyTerm != null) + { + var keyTerm = inputToken.KeyTerm; + if (currState.Actions.TryGetValue(keyTerm, out action)) + { + #region comments + + // Ok, we found match as a key term (keyword or special symbol) + // Backpatch the token's term. For example in most cases keywords would be recognized as Identifiers by Scanner. + // Identifier would also check with SymbolTerms table and set AsSymbol field to SymbolTerminal if there exist + // one for token content. So we first find action by Symbol if there is one; if we find action, then we + // patch token's main terminal to AsSymbol value. This is important for recognizing keywords (for colorizing), + // and for operator precedence algorithm to work when grammar uses operators like "AND", "OR", etc. + //TODO: This might be not quite correct action, and we can run into trouble with some languages that have keywords that + // are not reserved words. But proper implementation would require substantial addition to parser code: + // when running into errors, we need to check the stack for places where we made this "interpret as Symbol" + // decision, roll back the stack and try to reinterpret as identifier + + #endregion + + inputToken.SetTerminal(keyTerm); + currInput.Term = keyTerm; + currInput.Precedence = keyTerm.Precedence; + currInput.Associativity = keyTerm.Associativity; + return action; + } + } + //Try to get by main Terminal, only if it is not the same as symbol + if (currState.Actions.TryGetValue(currInput.Term, out action)) + return action; + //If input is EOF and NewLineBeforeEof flag is set, try using NewLine to find action + if (currInput.Term == _grammar.Eof && _grammar.LanguageFlags.IsSet(LanguageFlags.NewLineBeforeEOF) && + currState.Actions.TryGetValue(_grammar.NewLine, out action)) + { + //There's no action for EOF but there's action for NewLine. Let's add newLine token as input, just in case + // action code wants to check input - it should see NewLine. + var newLineToken = new Token(_grammar.NewLine, currInput.Token.Location, "\r\n", null); + var newLineNode = new ParseTreeNode(newLineToken); + Context.CurrentParserInput = newLineNode; + return action; + }//if + return null; + } + + #endregion + + #region reading input + + public void ReadInput() + { + Token token; + Terminal term; + //Get token from scanner while skipping all comment tokens (but accumulating them in comment block) + do + { + token = Scanner.GetToken(); + term = token.Terminal; + if (term.Category == TokenCategory.Comment) + Context.CurrentCommentTokens.Add(token); + } while (term.Flags.IsSet(TermFlags.IsNonGrammar) && term != _grammar.Eof); + //Check brace token + if (term.Flags.IsSet(TermFlags.IsBrace) && !CheckBraceToken(token)) + token = new Token(_grammar.SyntaxError, token.Location, token.Text, + string.Format(Resources.ErrUnmatchedCloseBrace, token.Text)); + //Create parser input node + Context.CurrentParserInput = new ParseTreeNode(token); + //attach comments if any accumulated to content token + if (token.Terminal.Category == TokenCategory.Content) + { + Context.CurrentParserInput.Comments = Context.CurrentCommentTokens; + Context.CurrentCommentTokens = new TokenList(); + } + //Fire event on Terminal + token.Terminal.OnParserInputPreview(Context); + } + + #endregion + + #region Error Recovery + + public void RecoverFromError() + { + Data.ErrorAction.Execute(Context); + } + + #endregion + + #region Utilities + + private bool CheckPartialInputCompleted() + { + bool partialCompleted = Context.Mode == ParseMode.CommandLine && Context.CurrentParserInput.Term == _grammar.Eof; + if (!partialCompleted) return false; + Context.Status = ParserStatus.AcceptedPartial; + // clean up EOF in input so we can continue parsing next line + Context.CurrentParserInput = null; + return true; + } + + // We assume here that the token is a brace (opening or closing) + private bool CheckBraceToken(Token token) + { + if (token.Terminal.Flags.IsSet(TermFlags.IsOpenBrace)) + { + Context.OpenBraces.Push(token); + return true; + } + //it is closing brace; check if we have opening brace in the stack + var braces = Context.OpenBraces; + var match = braces.Count > 0 && braces.Peek().Terminal.IsPairFor == token.Terminal; + if (!match) return false; + //Link both tokens, pop the stack and return true + var openingBrace = braces.Pop(); + openingBrace.OtherBrace = token; + token.OtherBrace = openingBrace; + return true; + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserActions/AcceptParserAction.cs b/src/Irony/Parsing/Parsers/ParserActions/AcceptParserAction.cs new file mode 100644 index 0000000..00937fc --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserActions/AcceptParserAction.cs @@ -0,0 +1,16 @@ +namespace Sanchime.Irony.Parsing.Parsers.ParserActions +{ + public class AcceptParserAction : ParserAction + { + public override void Execute(ParsingContext context) + { + context.CurrentParseTree.Root = context.ParserStack.Pop(); //Pop root + context.Status = ParserStatus.Accepted; + } + + public override string ToString() + { + return Resources.LabelActionAccept; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserActions/ErrorRecoveryParserAction.cs b/src/Irony/Parsing/Parsers/ParserActions/ErrorRecoveryParserAction.cs new file mode 100644 index 0000000..25f3fd5 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserActions/ErrorRecoveryParserAction.cs @@ -0,0 +1,95 @@ +namespace Sanchime.Irony.Parsing.Parsers.ParserActions +{ + //TODO: Improve recovery by adding automatic injection of missing tokens. + // Make sure we ALWAYS have output parse tree, even if it is messed up + public class ErrorRecoveryParserAction : ParserAction + { + public override void Execute(ParsingContext context) + { + context.Status = ParserStatus.Error; + var grammar = context.Language.Grammar; + grammar.ReportParseError(context); + // Do not recover if we're already at EOF, or if we're in command line mode + if (context.CurrentParserInput.Term == grammar.Eof || context.Mode == ParseMode.CommandLine) + return; + //Try to recover from error + context.Status = ParserStatus.Recovering; + context.AddTrace(Resources.MsgTraceRecovering); // *** RECOVERING - searching for state with error shift *** + var recovered = TryRecoverFromError(context); + if (recovered) + { + context.AddTrace(Resources.MsgTraceRecoverSuccess); //add new trace entry + context.Status = ParserStatus.Parsing; + } + else + { + context.AddTrace(Resources.MsgTraceRecoverFailed); + context.Status = ParserStatus.Error; + } + } + + protected bool TryRecoverFromError(ParsingContext context) + { + var grammar = context.Language.Grammar; + var parser = context.Parser; + //1. We need to find a state in the stack that has a shift item based on error production (with error token), + // and error terminal is current. This state would have a shift action on error token. + ParserAction errorShiftAction = FindErrorShiftActionInStack(context); + if (errorShiftAction == null) return false; //we failed to recover + context.AddTrace(Resources.MsgTraceRecoverFoundState, context.CurrentParserState); + //2. Shift error token - execute shift action + context.AddTrace(Resources.MsgTraceRecoverShiftError, errorShiftAction); + errorShiftAction.Execute(context); + //4. Now we need to go along error production until the end, shifting tokens that CAN be shifted and ignoring others. + // We shift until we can reduce + context.AddTrace(Resources.MsgTraceRecoverShiftTillEnd); + while (true) + { + if (context.CurrentParserInput == null) + parser.ReadInput(); + if (context.CurrentParserInput.Term == grammar.Eof) + return false; + //Check if we can reduce + var nextAction = parser.GetNextAction(); + if (nextAction == null) + { + parser.ReadInput(); + continue; + } + if (nextAction is ReduceParserAction) + { + //We are reducing a fragment containing error - this is the end of recovery + //Clear all input token queues and buffered input, reset location back to input position token queues; + context.SetSourceLocation(context.CurrentParserInput.Span.Location); + + //Reduce error production - it creates parent non-terminal that "hides" error inside + context.AddTrace(Resources.MsgTraceRecoverReducing); + context.AddTrace(Resources.MsgTraceRecoverAction, nextAction); + nextAction.Execute(context); //execute reduce + return true; //we recovered + } + // If it is not reduce, simply execute it (it is most likely shift) + context.AddTrace(Resources.MsgTraceRecoverAction, nextAction); + nextAction.Execute(context); //shift input token + } + }//method + + private ParserAction FindErrorShiftActionInStack(ParsingContext context) + { + var grammar = context.Language.Grammar; + while (context.ParserStack.Count >= 1) + { + ParserAction errorShiftAction; + if (context.CurrentParserState.Actions.TryGetValue(grammar.SyntaxError, out errorShiftAction) + && errorShiftAction is ShiftParserAction) + return errorShiftAction; + //pop next state from stack + if (context.ParserStack.Count == 1) + return null; //don't pop the initial state + context.ParserStack.Pop(); + context.CurrentParserState = context.ParserStack.Top.State; + } + return null; + } + }//class +}//ns \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserActions/ReduceParserActions.cs b/src/Irony/Parsing/Parsers/ParserActions/ReduceParserActions.cs new file mode 100644 index 0000000..ba50671 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserActions/ReduceParserActions.cs @@ -0,0 +1,182 @@ +namespace Sanchime.Irony.Parsing.Parsers.ParserActions +{ + /// Base class for more specific reduce actions. + public partial class ReduceParserAction : ParserAction + { + public readonly Production Production; + + public ReduceParserAction(Production production) + { + Production = production; + } + + public override string ToString() + { + return string.Format(Resources.LabelActionReduce, Production.ToStringQuoted()); + } + + /// Factory method for creating a proper type of reduce parser action. + /// A Production to reduce. + /// Reduce action. + public static ReduceParserAction Create(Production production) + { + var nonTerm = production.LValue; + //List builder (non-empty production for list non-terminal) is a special case + var isList = nonTerm.Flags.IsSet(TermFlags.IsList); + var isListBuilderProduction = isList && production.RValues.Count > 0 && production.RValues[0] == production.LValue; + if (isListBuilderProduction) + return new ReduceListBuilderParserAction(production); + else if (nonTerm.Flags.IsSet(TermFlags.IsListContainer)) + return new ReduceListContainerParserAction(production); + else if (nonTerm.Flags.IsSet(TermFlags.IsTransient)) + return new ReduceTransientParserAction(production); + else + return new ReduceParserAction(production); + } + + public override void Execute(ParsingContext context) + { + var savedParserInput = context.CurrentParserInput; + context.CurrentParserInput = GetResultNode(context); + CompleteReduce(context); + context.CurrentParserInput = savedParserInput; + } + + protected virtual ParseTreeNode GetResultNode(ParsingContext context) + { + var childCount = Production.RValues.Count; + int firstChildIndex = context.ParserStack.Count - childCount; + var span = context.ComputeStackRangeSpan(childCount); + var newNode = new ParseTreeNode(Production.LValue, span); + for (int i = 0; i < childCount; i++) + { + var childNode = context.ParserStack[firstChildIndex + i]; + if (childNode.IsPunctuationOrEmptyTransient()) continue; //skip punctuation or empty transient nodes + newNode.ChildNodes.Add(childNode); + }//for i + return newNode; + } + + //Completes reduce: pops child nodes from the stack and pushes result node into the stack + protected void CompleteReduce(ParsingContext context) + { + var resultNode = context.CurrentParserInput; + var childCount = Production.RValues.Count; + //Pop stack + context.ParserStack.Pop(childCount); + //Copy comment block from first child; if comments precede child node, they precede the parent as well. + if (resultNode.ChildNodes.Count > 0) + resultNode.Comments = resultNode.ChildNodes[0].Comments; + //Inherit precedence and associativity, to cover a standard case: BinOp->+|-|*|/; + // BinOp node should inherit precedence from underlying operator symbol. + //TODO: this special case will be handled differently. A ToTerm method should be expanded to allow "combined" terms like "NOT LIKE". + // OLD COMMENT: A special case is SQL operator "NOT LIKE" which consists of 2 tokens. We therefore inherit "max" precedence from any children + if (Production.LValue.Flags.IsSet(TermFlags.InheritPrecedence)) + InheritPrecedence(resultNode); + //Push new node into stack and move to new state + //First read the state from top of the stack + context.CurrentParserState = context.ParserStack.Top.State; + if (context.TracingEnabled) + context.AddTrace(Resources.MsgTracePoppedState, Production.LValue.Name); + + #region comments on special case + + //Special case: if a non-terminal is Transient (ex: BinOp), then result node is not this NonTerminal, but its its child (ex: symbol). + // Shift action will invoke OnShifting on actual term being shifted (symbol); we need to invoke Shifting even on NonTerminal itself + // - this would be more expected behavior in general. ImpliedPrecHint relies on this + + #endregion comments on special case + + if (resultNode.Term != Production.LValue) //special case + Production.LValue.OnShifting(context.SharedParsingEventArgs); + // Shift to new state - execute shift over the non-terminal of the production. + var shift = context.CurrentParserState.Actions[Production.LValue]; + // Execute shift to new state + shift.Execute(context); + //Invoke Reduce event + Production.LValue.OnReduced(context, Production, resultNode); + } + + //This operation helps in situation when Bin expression is declared as BinExpr.Rule = expr + BinOp + expr; + // where BinOp is an OR-combination of operators. + // During parsing, when 'expr, BinOp, expr' is on the top of the stack, + // and incoming symbol is operator, we need to use precedence rule for deciding on the action. + private void InheritPrecedence(ParseTreeNode node) + { + for (int i = 0; i < node.ChildNodes.Count; i++) + { + var child = node.ChildNodes[i]; + if (child.Precedence == BnfTerm.NoPrecedence) continue; + node.Precedence = child.Precedence; + node.Associativity = child.Associativity; + return; + } + } + }//class + + /// Reduces non-terminal marked as Transient by MarkTransient method. + public class ReduceTransientParserAction : ReduceParserAction + { + public ReduceTransientParserAction(Production production) : base(production) + { + } + + protected override ParseTreeNode GetResultNode(ParsingContext context) + { + var topIndex = context.ParserStack.Count - 1; + var childCount = Production.RValues.Count; + for (int i = 0; i < childCount; i++) + { + var child = context.ParserStack[topIndex - i]; + if (child.IsPunctuationOrEmptyTransient()) continue; + return child; + } + //Otherwise return an empty transient node; if it is part of the list, the list will skip it + var span = context.ComputeStackRangeSpan(childCount); + return new ParseTreeNode(Production.LValue, span); + } + }//class + + /// Reduces list created by MakePlusRule or MakeListRule methods. + public class ReduceListBuilderParserAction : ReduceParserAction + { + public ReduceListBuilderParserAction(Production production) : base(production) + { + } + + protected override ParseTreeNode GetResultNode(ParsingContext context) + { + int childCount = Production.RValues.Count; + int firstChildIndex = context.ParserStack.Count - childCount; + var listNode = context.ParserStack[firstChildIndex]; //get the list already created - it is the first child node + listNode.Span = context.ComputeStackRangeSpan(childCount); + var listMember = context.ParserStack.Top; //next list member is the last child - at the top of the stack + if (listMember.IsPunctuationOrEmptyTransient()) + return listNode; + listNode.ChildNodes.Add(listMember); + return listNode; + } + }//class + + //List container is an artificial non-terminal created by MakeStarRule method; the actual list is a direct child. + public class ReduceListContainerParserAction : ReduceParserAction + { + public ReduceListContainerParserAction(Production production) : base(production) + { + } + + protected override ParseTreeNode GetResultNode(ParsingContext context) + { + int childCount = Production.RValues.Count; + int firstChildIndex = context.ParserStack.Count - childCount; + var span = context.ComputeStackRangeSpan(childCount); + var newNode = new ParseTreeNode(Production.LValue, span); + if (childCount > 0) + { //if it is not empty production - might happen for MakeStarRule + var listNode = context.ParserStack[firstChildIndex]; //get the transient list with all members - it is the first child node + newNode.ChildNodes.AddRange(listNode.ChildNodes); //copy all list members + } + return newNode; + } + }//class +}//ns \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserActions/ShiftParserAction.cs b/src/Irony/Parsing/Parsers/ParserActions/ShiftParserAction.cs new file mode 100644 index 0000000..ce47350 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserActions/ShiftParserAction.cs @@ -0,0 +1,35 @@ +namespace Sanchime.Irony.Parsing.Parsers.ParserActions +{ + public class ShiftParserAction : ParserAction + { + public readonly BnfTerm Term; + public readonly ParserState NewState; + + public ShiftParserAction(LRItem item) : this(item.Core.Current, item.ShiftedItem.State) + { + } + + public ShiftParserAction(BnfTerm term, ParserState newState) + { + if (newState == null) + throw new Exception("ParserShiftAction: newState may not be null. term: " + term.ToString()); + + Term = term; + NewState = newState; + } + + public override void Execute(ParsingContext context) + { + var currInput = context.CurrentParserInput; + currInput.Term.OnShifting(context.SharedParsingEventArgs); + context.ParserStack.Push(currInput, NewState); + context.CurrentParserState = NewState; + context.CurrentParserInput = null; + } + + public override string ToString() + { + return string.Format(Resources.LabelActionShift, NewState.Name); + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserActions/_ParserAction.cs b/src/Irony/Parsing/Parsers/ParserActions/_ParserAction.cs new file mode 100644 index 0000000..2a494b2 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserActions/_ParserAction.cs @@ -0,0 +1,20 @@ +namespace Sanchime.Irony.Parsing.Parsers.ParserActions +{ + public abstract partial class ParserAction + { + public ParserAction() + { } + + public virtual void Execute(ParsingContext context) + { + } + + public override string ToString() + { + return Resources.LabelActionUnknown; //should never happen + } + }//class ParserAction + + public class ParserActionTable : Dictionary + { } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserDataPrinter.cs b/src/Irony/Parsing/Parsers/ParserDataPrinter.cs new file mode 100644 index 0000000..76cd98b --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserDataPrinter.cs @@ -0,0 +1,99 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Parsers +{ + public static class ParserDataPrinter + { + public static string PrintStateList(LanguageData language) + { + StringBuilder sb = new StringBuilder(); + foreach (ParserState state in language.ParserData.States) + { + sb.Append("State " + state.Name); + if (state.BuilderData.IsInadequate) sb.Append(" (Inadequate)"); + sb.AppendLine(); + var srConflicts = state.BuilderData.GetShiftReduceConflicts(); + if (srConflicts.Count > 0) + sb.AppendLine(" Shift-reduce conflicts on inputs: " + srConflicts.ToString()); + var ssConflicts = state.BuilderData.GetReduceReduceConflicts(); + if (ssConflicts.Count > 0) + sb.AppendLine(" Reduce-reduce conflicts on inputs: " + ssConflicts.ToString()); + //LRItems + if (state.BuilderData.ShiftItems.Count > 0) + { + sb.AppendLine(" Shift items:"); + foreach (var item in state.BuilderData.ShiftItems) + sb.AppendLine(" " + item.ToString()); + } + if (state.BuilderData.ReduceItems.Count > 0) + { + sb.AppendLine(" Reduce items:"); + foreach (LRItem item in state.BuilderData.ReduceItems) + { + var sItem = item.ToString(); + if (item.Lookaheads.Count > 0) + sItem += " [" + item.Lookaheads.ToString() + "]"; + sb.AppendLine(" " + sItem); + } + } + sb.Append(" Transitions: "); + bool atFirst = true; + foreach (BnfTerm key in state.Actions.Keys) + { + var action = state.Actions[key] as ShiftParserAction; + if (action == null) + continue; + if (!atFirst) sb.Append(", "); + atFirst = false; + sb.Append(key.ToString()); + sb.Append("->"); + sb.Append(action.NewState.Name); + } + sb.AppendLine(); + sb.AppendLine(); + }//foreach + return sb.ToString(); + } + + public static string PrintTerminals(LanguageData language) + { + var termList = language.GrammarData.Terminals.ToList(); + termList.Sort((x, y) => string.Compare(x.Name, y.Name)); + var result = string.Join(Environment.NewLine, termList); + return result; + } + + public static string PrintNonTerminals(LanguageData language) + { + StringBuilder sb = new StringBuilder(); + var ntList = language.GrammarData.NonTerminals.ToList(); + ntList.Sort((x, y) => string.Compare(x.Name, y.Name)); + foreach (var nt in ntList) + { + sb.Append(nt.Name); + sb.Append(nt.Flags.IsSet(TermFlags.IsNullable) ? " (Nullable) " : string.Empty); + sb.AppendLine(); + foreach (Production pr in nt.Productions) + { + sb.Append(" "); + sb.AppendLine(pr.ToString()); + } + }//foreachc nt + return sb.ToString(); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserStack.cs b/src/Irony/Parsing/Parsers/ParserStack.cs new file mode 100644 index 0000000..07f92d1 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserStack.cs @@ -0,0 +1,61 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers +{ + public class ParserStack : List + { + public ParserStack() : base(200) + { + } + + public void Push(ParseTreeNode nodeInfo) + { + Add(nodeInfo); + } + + public void Push(ParseTreeNode nodeInfo, ParserState state) + { + nodeInfo.State = state; + Add(nodeInfo); + } + + public ParseTreeNode Pop() + { + var top = Top; + RemoveAt(Count - 1); + return top; + } + + public void Pop(int count) + { + RemoveRange(Count - count, count); + } + + public void PopUntil(int finalCount) + { + if (finalCount < Count) + Pop(Count - finalCount); + } + + public ParseTreeNode Top + { + get + { + if (Count == 0) return null; + return base[Count - 1]; + } + } + } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParserTrace.cs b/src/Irony/Parsing/Parsers/ParserTrace.cs new file mode 100644 index 0000000..cec13f6 --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParserTrace.cs @@ -0,0 +1,52 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers +{ + public class ParserTraceEntry + { + public ParserState State; + public ParseTreeNode StackTop; + public ParseTreeNode Input; + public string Message; + public bool IsError; + + public ParserTraceEntry(ParserState state, ParseTreeNode stackTop, ParseTreeNode input, string message, bool isError) + { + State = state; + StackTop = stackTop; + Input = input; + Message = message; + IsError = isError; + } + }//class + + public class ParserTrace : List + { } + + public class ParserTraceEventArgs : EventArgs + { + public ParserTraceEventArgs(ParserTraceEntry entry) + { + Entry = entry; + } + + public readonly ParserTraceEntry Entry; + + public override string ToString() + { + return Entry.ToString(); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParsingContext.cs b/src/Irony/Parsing/Parsers/ParsingContext.cs new file mode 100644 index 0000000..af8297b --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParsingContext.cs @@ -0,0 +1,321 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.TokenFilters; +using System.Globalization; +using System.Runtime.InteropServices; + +namespace Sanchime.Irony.Parsing.Parsers +{ + [Flags] + public enum ParseOptions + { + Reserved = 0x01, + AnalyzeCode = 0x10, //run code analysis; effective only in Module mode + } + + public enum ParseMode + { + File, //default, continuous input file + VsLineScan, // line-by-line scanning in VS integration for syntax highlighting + CommandLine, //line-by-line from console + } + + public enum ParserStatus + { + Init, //initial state + Parsing, + Previewing, //previewing tokens + Recovering, //recovering from error + Accepted, + AcceptedPartial, + Error, + } + + // The purpose of this class is to provide a container for information shared + // between parser, scanner and token filters. + public partial class ParsingContext + { + public readonly Parser Parser; + public readonly LanguageData Language; + + //Parser settings + public ParseOptions Options; + + public bool TracingEnabled; + public ParseMode Mode = ParseMode.File; + public int MaxErrors = 20; //maximum error count to report + public CultureInfo Culture; //defaults to Grammar.DefaultCulture, might be changed by app code + + #region properties and fields + + //Parser fields + public ParseTree CurrentParseTree { get; internal set; } + + public readonly TokenStack OpenBraces = new TokenStack(); + public ParserTrace ParserTrace = new ParserTrace(); + internal readonly ParserStack ParserStack = new ParserStack(); + + public ParserState CurrentParserState { get; internal set; } + public ParseTreeNode CurrentParserInput { get; internal set; } + public Token CurrentToken; //The token just scanned by Scanner + public TokenList CurrentCommentTokens = new TokenList(); //accumulated comment tokens + public Token PreviousToken; + public SourceLocation PreviousLineStart; //Location of last line start + + //list for terminals - for current parser state and current input char + public TerminalList CurrentTerminals = new TerminalList(); + + public ISourceStream Source; + + //Internal fields + internal TokenFilterList TokenFilters = new TokenFilterList(); + + internal TokenStack BufferedTokens = new TokenStack(); + internal IEnumerator FilteredTokens; //stream of tokens after filter + internal TokenStack PreviewTokens = new TokenStack(); + internal ParsingEventArgs SharedParsingEventArgs; + internal ValidateTokenEventArgs SharedValidateTokenEventArgs; + + public VsScannerStateMap VsLineScanState; //State variable used in line scanning mode for VS integration + + public ParserStatus Status { get; internal set; } + public bool HasErrors; //error flag, once set remains set + + //values dictionary to use by custom language implementations to save some temporary values during parsing + public readonly Dictionary Values = new Dictionary(); + + public int TabWidth = 8; + + #endregion + + #region constructors + + public ParsingContext(Parser parser) + { + Parser = parser; + Language = Parser.Language; + Culture = Language.Grammar.DefaultCulture; + //This might be a problem for multi-threading - if we have several contexts on parallel threads with different culture. + //Resources.Culture is static property (this is not Irony's fault, this is auto-generated file). + Resources.Culture = Culture; + SharedParsingEventArgs = new ParsingEventArgs(this); + SharedValidateTokenEventArgs = new ValidateTokenEventArgs(this); + } + + #endregion + + #region Events: TokenCreated + + public event EventHandler TokenCreated; + + internal void OnTokenCreated() + { + if (TokenCreated != null) + TokenCreated(this, SharedParsingEventArgs); + } + + #endregion + + #region Error handling and tracing + + public Token CreateErrorToken(string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(message, args); + return Source.CreateToken(Language.Grammar.SyntaxError, message); + } + + public void AddParserError(string message, params object[] args) + { + var location = CurrentParserInput == null ? Source.Location : CurrentParserInput.Span.Location; + HasErrors = true; + AddParserMessage(ErrorLevel.Error, location, message, args); + } + + public void AddParserMessage(ErrorLevel level, SourceLocation location, string message, params object[] args) + { + if (CurrentParseTree == null) return; + if (CurrentParseTree.ParserMessages.Count >= MaxErrors) return; + if (args != null && args.Length > 0) + message = string.Format(message, args); + CurrentParseTree.ParserMessages.Add(new LogMessage(level, location, message, CurrentParserState)); + if (TracingEnabled) + AddTrace(true, message); + } + + public void AddTrace(string message, params object[] args) + { + AddTrace(false, message, args); + } + + public void AddTrace(bool asError, string message, params object[] args) + { + if (!TracingEnabled) + return; + if (args != null && args.Length > 0) + message = string.Format(message, args); + ParserTrace.Add(new ParserTraceEntry(CurrentParserState, ParserStack.Top, CurrentParserInput, message, asError)); + } + + #region comments + + // Computes set of expected terms in a parser state. While there may be extended list of symbols expected at some point, + // we want to reorganize and reduce it. For example, if the current state expects all arithmetic operators as an input, + // it would be better to not list all operators (+, -, *, /, etc) but simply put "operator" covering them all. + // To achieve this grammar writer can group operators (or any other terminals) into named groups using Grammar's methods + // AddTermReportGroup, AddNoReportGroup etc. Then instead of reporting each operator separately, Irony would include + // a single "group name" to represent them all. + // The "expected report set" is not computed during parser construction (it would take considerable time), + // but does it on demand during parsing, when error is detected and the expected set is actually needed for error message. + // Multi-threading concerns. When used in multi-threaded environment (web server), the LanguageData would be shared in + // application-wide cache to avoid rebuilding the parser data on every request. The LanguageData is immutable, except + // this one case - the expected sets are constructed late by CoreParser on the when-needed basis. + // We don't do any locking here, just compute the set and on return from this function the state field is assigned. + // We assume that this field assignment is an atomic, concurrency-safe operation. The worst thing that might happen + // is "double-effort" when two threads start computing the same set around the same time, and the last one to finish would + // leave its result in the state field. + + #endregion + + internal static StringSet ComputeGroupedExpectedSetForState(Grammar grammar, ParserState state) + { + var terms = new TerminalSet(); + terms.UnionWith(state.ExpectedTerminals); + var result = new StringSet(); + //Eliminate no-report terminals + foreach (var group in grammar.TermReportGroups) + if (group.GroupType == TermReportGroupType.DoNotReport) + terms.ExceptWith(group.Terminals); + //Add normal and operator groups + foreach (var group in grammar.TermReportGroups) + if ((group.GroupType == TermReportGroupType.Normal || group.GroupType == TermReportGroupType.Operator) && + terms.Overlaps(group.Terminals)) + { + result.Add(group.Alias); + terms.ExceptWith(group.Terminals); + } + //Add remaining terminals "as is" + foreach (var terminal in terms) + result.Add(terminal.ErrorAlias); + return result; + } + + #endregion + + internal void Reset() + { + CurrentParserState = Parser.InitialState; + CurrentParserInput = null; + CurrentCommentTokens = new TokenList(); + ParserStack.Clear(); + HasErrors = false; + ParserStack.Push(new ParseTreeNode(CurrentParserState)); + CurrentParseTree = null; + OpenBraces.Clear(); + ParserTrace.Clear(); + CurrentTerminals.Clear(); + CurrentToken = null; + PreviousToken = null; + PreviousLineStart = new SourceLocation(0, -1, 0); + BufferedTokens.Clear(); + PreviewTokens.Clear(); + Values.Clear(); + foreach (var filter in TokenFilters) + filter.Reset(); + } + + public void SetSourceLocation(SourceLocation location) + { + foreach (var filter in TokenFilters) + filter.OnSetSourceLocation(location); + Source.Location = location; + } + + public SourceSpan ComputeStackRangeSpan(int nodeCount) + { + if (nodeCount == 0) + return new SourceSpan(CurrentParserInput.Span.Location, 0); + var first = ParserStack[ParserStack.Count - nodeCount]; + var last = ParserStack.Top; + return new SourceSpan(first.Span.Location, last.Span.EndPosition - first.Span.Location.Position); + } + + #region Expected term set computations + + public StringSet GetExpectedTermSet() + { + if (CurrentParserState == null) + return new StringSet(); + //See note about multi-threading issues in ComputeReportedExpectedSet comments. + if (CurrentParserState.ReportedExpectedSet == null) + CurrentParserState.ReportedExpectedSet = ParserDataBuilder.ComputeGroupedExpectedSetForState(Language.Grammar, CurrentParserState); + //Filter out closing braces which are not expected based on previous input. + // While the closing parenthesis ")" might be expected term in a state in general, + // if there was no opening parenthesis in preceding input then we would not + // expect a closing one. + var expectedSet = FilterBracesInExpectedSet(CurrentParserState.ReportedExpectedSet); + return expectedSet; + } + + private StringSet FilterBracesInExpectedSet(StringSet stateExpectedSet) + { + var result = new StringSet(); + result.UnionWith(stateExpectedSet); + //Find what brace we expect + var nextClosingBrace = string.Empty; + if (OpenBraces.Count > 0) + { + var lastOpenBraceTerm = OpenBraces.Peek().KeyTerm; + var nextClosingBraceTerm = lastOpenBraceTerm.IsPairFor as KeyTerm; + if (nextClosingBraceTerm != null) + nextClosingBrace = nextClosingBraceTerm.Text; + } + //Now check all closing braces in result set, and leave only nextClosingBrace + foreach (var term in Language.Grammar.KeyTerms.Values) + { + if (term.Flags.IsSet(TermFlags.IsCloseBrace)) + { + var brace = term.Text; + if (result.Contains(brace) && brace != nextClosingBrace) + result.Remove(brace); + } + }//foreach term + return result; + } + + #endregion + }//class + + // A struct used for packing/unpacking ScannerState int value; used for VS integration. + // When Terminal produces incomplete token, it sets + // this state to non-zero value; this value identifies this terminal as the one who will continue scanning when + // it resumes, and the terminal's internal state when there may be several types of multi-line tokens for one terminal. + // For ex., there maybe several types of string literal like in Python. + [StructLayout(LayoutKind.Explicit)] + public struct VsScannerStateMap + { + [FieldOffset(0)] + public int Value; + + [FieldOffset(0)] + public byte TerminalIndex; //1-based index of active multiline term in MultilineTerminals + + [FieldOffset(1)] + public byte TokenSubType; //terminal subtype (used in StringLiteral to identify string kind) + + [FieldOffset(2)] + public short TerminalFlags; //Terminal flags + }//struct +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/ParsingEventArgs.cs b/src/Irony/Parsing/Parsers/ParsingEventArgs.cs new file mode 100644 index 0000000..1d51f5e --- /dev/null +++ b/src/Irony/Parsing/Parsers/ParsingEventArgs.cs @@ -0,0 +1,54 @@ +namespace Sanchime.Irony.Parsing.Parsers +{ + public class ParsingEventArgs : EventArgs + { + public readonly ParsingContext Context; + + public ParsingEventArgs(ParsingContext context) + { + Context = context; + } + } + + public class ReducedEventArgs : ParsingEventArgs + { + public readonly Production ReducedProduction; + public readonly ParseTreeNode ResultNode; + + public ReducedEventArgs(ParsingContext context, Production reducedProduction, ParseTreeNode resultNode) : base(context) + { + ReducedProduction = reducedProduction; + ResultNode = resultNode; + } + } + + public class ValidateTokenEventArgs : ParsingEventArgs + { + public ValidateTokenEventArgs(ParsingContext context) : base(context) + { + } + + public Token Token + { + get { return Context.CurrentToken; } + }//Token + + public void ReplaceToken(Token token) + { + Context.CurrentToken = token; + } + + public void SetError(string errorMessage, params object[] messageArgs) + { + Context.CurrentToken = Context.CreateErrorToken(errorMessage, messageArgs); + } + + //Rejects the token; use it when there's more than one terminal that can be used to scan the input and ValidateToken is used + // to help Scanner make the decision. Once the token is rejected, the scanner will move to the next Terminal (with lower priority) + // and will try to produce token. + public void RejectToken() + { + Context.CurrentToken = null; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/ConditionalParserAction.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/ConditionalParserAction.cs new file mode 100644 index 0000000..a1d71f5 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/ConditionalParserAction.cs @@ -0,0 +1,85 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + public enum PreferredActionType + { + Shift, + Reduce, + } + + public class ConditionalParserAction : ParserAction + { + #region embedded types + + public delegate bool ConditionChecker(ParsingContext context); + + public class ConditionalEntry + { + public ConditionChecker Condition; + public ParserAction Action; + public string Description; //for tracing + + public ConditionalEntry(ConditionChecker condition, ParserAction action, string description) + { + Condition = condition; + Action = action; + Description = description; + } + + public override string ToString() + { + return Description + "; action: " + Action.ToString(); + } + } + + public class ConditionalEntryList : List + { } + + #endregion + + public ConditionalEntryList ConditionalEntries = new(); + public ParserAction DefaultAction; + + public override void Execute(ParsingContext context) + { + var traceEnabled = context.TracingEnabled; + if (traceEnabled) context.AddTrace("Conditional Parser Action."); + for (int i = 0; i < ConditionalEntries.Count; i++) + { + var ce = ConditionalEntries[i]; + if (traceEnabled) context.AddTrace(" Checking condition: " + ce.Description); + if (ce.Condition(context)) + { + if (traceEnabled) context.AddTrace(" Condition is TRUE, executing action: " + ce.Action.ToString()); + ce.Action.Execute(context); + return; + } + } + //if no conditions matched, execute default action + if (DefaultAction == null) + { + context.AddParserError("Fatal parser error: no conditions matched in conditional parser action, and default action is null." + + " State: {0}", context.CurrentParserState.Name); + context.Parser.RecoverFromError(); + return; + } + if (traceEnabled) context.AddTrace(" All conditions failed, executing default action: " + DefaultAction.ToString()); + DefaultAction.Execute(context); + }//method + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/CustomActionHintAction.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/CustomActionHintAction.cs new file mode 100644 index 0000000..5037c95 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/CustomActionHintAction.cs @@ -0,0 +1,102 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + //These two delegates define custom methods that Grammar can implement to execute custom action + public delegate void PreviewActionMethod(CustomParserAction action); + + public delegate void ExecuteActionMethod(ParsingContext context, CustomParserAction action); + + public class CustomActionHint : GrammarHint + { + private ExecuteActionMethod _executeMethod; + private PreviewActionMethod _previewMethod; + + public CustomActionHint(ExecuteActionMethod executeMethod, PreviewActionMethod previewMethod = null) + { + _executeMethod = executeMethod; + _previewMethod = previewMethod; + } + + public override void Apply(LanguageData language, LRItem owner) + { + //Create custom action and put it into state.Actions table + var state = owner.State; + var action = new CustomParserAction(language, state, _executeMethod); + _previewMethod?.Invoke(action); + if (!state.BuilderData.IsInadequate) // adequate state, with a single possible action which is DefaultAction + state.DefaultAction = action; + else if (owner.Core.Current != null) //shift action + state.Actions[owner.Core.Current] = action; + else foreach (var lkh in owner.Lookaheads) + state.Actions[lkh] = action; + //We consider all conflicts handled by the action + state.BuilderData.Conflicts.Clear(); + }//method + }//Hint class + + // CustomParserAction is in fact action selector: it allows custom Grammar code to select the action to execute from a set of + // shift/reduce actions available in this state. + public class CustomParserAction : ParserAction + { + public LanguageData Language; + public ParserState State; + public ExecuteActionMethod ExecuteRef; + public TerminalSet Conflicts = new TerminalSet(); + public IList ShiftActions = new List(); + public IList ReduceActions = new List(); + public object CustomData; + + public CustomParserAction(LanguageData language, ParserState state, + ExecuteActionMethod executeRef) + { + Language = language; + State = state; + ExecuteRef = executeRef; + Conflicts.UnionWith(state.BuilderData.Conflicts); + // Create default shift and reduce actions + foreach (var shiftItem in state.BuilderData.ShiftItems) + ShiftActions.Add(new ShiftParserAction(shiftItem)); + foreach (var item in state.BuilderData.ReduceItems) + ReduceActions.Add(ReduceParserAction.Create(item.Core.Production)); + } + + public override void Execute(ParsingContext context) + { + if (context.TracingEnabled) + context.AddTrace(Resources.MsgTraceExecCustomAction); + //States with DefaultAction do NOT read input, so we read it here + if (context.CurrentParserInput == null) + context.Parser.ReadInput(); + // Remember old state and input; if they don't change after custom action - it is error, we may fall into an endless loop + var oldState = context.CurrentParserState; + var oldInput = context.CurrentParserInput; + ExecuteRef(context, this); + //Prevent from falling into an infinite loop + if (context.CurrentParserState == oldState && context.CurrentParserInput == oldInput) + { + context.AddParserError(Resources.MsgErrorCustomActionDidNotAdvance); + context.Parser.RecoverFromError(); + } + }//method + + public override string ToString() + { + return "CustomParserAction"; + } + }//class +}//ns \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/ImpliedPrecedenceHint.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/ImpliedPrecedenceHint.cs new file mode 100644 index 0000000..7f770a5 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/ImpliedPrecedenceHint.cs @@ -0,0 +1,54 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + //Note: This in incomplete implementation. + // this implementation sets precedence only on operator symbols that are already "shifted" into the parser stack, + // ie those on the "left" of precedence comparison. It does not set precedence when operator symbol first appears in parser + // input. This works OK for unary operator but might break some advanced scenarios. + + public class ImpliedPrecedenceHint : GrammarHint + { + public const int ImpliedPrecedenceCustomFlag = 0x01000000; // a flag to mark a state for setting implied precedence + + //GrammarHint inherits Precedence and Associativity members from BnfTerm; we'll use them to store implied values for this hint + + public ImpliedPrecedenceHint(int precedence, Associativity associativity) + { + Precedence = precedence; + Associativity = associativity; + } + + public override void Apply(LanguageData language, LRItem owner) + { + //Check that owner is not final - we can imply precedence only in shift context + var curr = owner.Core.Current; + if (curr == null) + return; + //mark the state, to make sure we do stuff in Term_Shifting event handler only in appropriate states + owner.State.CustomFlags |= ImpliedPrecedenceCustomFlag; + curr.Shifting += TermShifting; + } + + private void TermShifting(object sender, ParsingEventArgs e) + { + //Set the values only if we are in the marked state + if (!e.Context.CurrentParserState.CustomFlagIsSet(ImpliedPrecedenceCustomFlag)) + return; + e.Context.CurrentParserInput.Associativity = Associativity; + e.Context.CurrentParserInput.Precedence = Precedence; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs new file mode 100644 index 0000000..2e4a52d --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs @@ -0,0 +1,57 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + public class PrecedenceBasedParserAction : ConditionalParserAction + { + private ShiftParserAction _shiftAction; + private ReduceParserAction _reduceAction; + + public PrecedenceBasedParserAction(BnfTerm shiftTerm, ParserState newShiftState, Production reduceProduction) + { + _reduceAction = new ReduceParserAction(reduceProduction); + var reduceEntry = new ConditionalEntry(CheckMustReduce, _reduceAction, "(Precedence comparison)"); + ConditionalEntries.Add(reduceEntry); + DefaultAction = _shiftAction = new ShiftParserAction(shiftTerm, newShiftState); + } + + private bool CheckMustReduce(ParsingContext context) + { + var input = context.CurrentParserInput; + var stackCount = context.ParserStack.Count; + var prodLength = _reduceAction.Production.RValues.Count; + for (int i = 1; i <= prodLength; i++) + { + var prevNode = context.ParserStack[stackCount - i]; + if (prevNode == null) continue; + if (prevNode.Precedence == BnfTerm.NoPrecedence) continue; + //if previous operator has the same precedence then use associativity + if (prevNode.Precedence == input.Precedence) + return input.Associativity == Associativity.Left; //if true then Reduce + else + return prevNode.Precedence > input.Precedence; //if true then Reduce + } + //If no operators found on the stack, do shift + return false; + } + + public override string ToString() + { + return string.Format(Resources.LabelActionOp, _shiftAction.NewState.Name, _reduceAction.Production.ToStringQuoted()); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs.rej b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs.rej new file mode 100644 index 0000000..fcd807a --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceBasedParserAction.cs.rej @@ -0,0 +1,27 @@ +--- PrecedenceBasedParserAction.cs ++++ PrecedenceBasedParserAction.cs +@@ -28,6 +28,24 @@ + base.DefaultAction = _shiftAction = new ShiftParserAction(shiftTerm, newShiftState); + } + ++ private bool CheckMustReduce(ParsingContext context) { ++ var input = context.CurrentParserInput; ++ var stackCount = context.ParserStack.Count; ++ var prodLength = _reduceAction.Production.RValues.Count; ++ for (int i = 1; i <= prodLength; i++) { ++ var prevNode = context.ParserStack[stackCount - i]; ++ if (prevNode == null) continue; ++ if (prevNode.Precedence == BnfTerm.NoPrecedence) continue; ++ //if previous operator has the same precedence then use associativity ++ if (prevNode.Precedence == input.Precedence) ++ return (input.Associativity == Associativity.Left); //if true then Reduce ++ else ++ return (prevNode.Precedence > input.Precedence); //if true then Reduce ++ } ++ //If no operators found on the stack, do shift ++ return false; ++ } ++/* + private static bool CheckMustReduce(ParsingContext context) { + var input = context.CurrentParserInput; + for (int i = context.ParserStack.Count - 1; i >= 0; i--) { diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceHint.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceHint.cs new file mode 100644 index 0000000..e8d6a49 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/PrecedenceHint.cs @@ -0,0 +1,52 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + /// A hint to use precedence. + /// + /// Not used directly in grammars; injected automatically by system in states having conflicts on operator symbols. + /// The purpose of the hint is make handling precedence similar to other conflict resolution methods - through hints + /// activated during parser construction. The hint code analyzes the conflict and resolves it by adding custom or general action + /// for a conflicting input. + /// + public class PrecedenceHint : GrammarHint + { + public override void Apply(LanguageData language, LRItem owner) + { + var state = owner.State; + var allConflicts = state.BuilderData.Conflicts; + if (allConflicts.Count == 0) + return; + //Find all conflicts that can be resolved by operator precedence + // SL does not support Find extension, so we do it with explicit loop + + /*foreach (var c in allConflicts) + if (c.Flags.IsSet(TermFlags.IsOperator)) + operConflicts.Add(c);*/ + var operConflicts = from c in allConflicts + where c.Flags.IsSet(TermFlags.IsOperator) + select c; + foreach (var conflict in operConflicts) + { + var newState = state.BuilderData.GetNextState(conflict); + var reduceItems = state.BuilderData.ReduceItems.SelectByLookahead(conflict).ToList(); + if (newState == null || reduceItems.Count != 1) + continue; // this cannot be fixed by precedence + state.Actions[conflict] = new PrecedenceBasedParserAction(conflict, newState, reduceItems[0].Core.Production); + allConflicts.Remove(conflict); + }//foreach conflict + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/PreferredActionHint.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/PreferredActionHint.cs new file mode 100644 index 0000000..0b936f3 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/PreferredActionHint.cs @@ -0,0 +1,63 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + public class PreferredActionHint : GrammarHint + { + private PreferredActionType ActionType; + + public PreferredActionHint(PreferredActionType actionType) + { + ActionType = actionType; + } + + public override void Apply(LanguageData language, LRItem owner) + { + var state = owner.State; + var conflicts = state.BuilderData.Conflicts; + if (conflicts.Count == 0) return; + switch (ActionType) + { + case PreferredActionType.Shift: + if (owner.Core.Current is not Terminal currTerm || !conflicts.Contains(currTerm)) return; //nothing to do + //Current term for shift item (hint owner) is a conflict - resolve it with shift action + var newState = owner.ShiftedItem.State; + var shiftAction = new ShiftParserAction(owner); + state.Actions[currTerm] = shiftAction; + conflicts.Remove(currTerm); + return; + + case PreferredActionType.Reduce: + if (!owner.Core.IsFinal) return; //we take care of reduce items only here + //we have a reduce item with "Reduce" hint. Check if any of lookaheads are in conflict + ReduceParserAction reduceAction = null; + foreach (var lkhead in owner.Lookaheads.Where(lkhead => conflicts.Contains(lkhead))) + { + if (reduceAction == null) + reduceAction = new ReduceParserAction(owner.Core.Production); + state.Actions[lkhead] = reduceAction; + conflicts.Remove(lkhead); + } + + return; + + default: + break; + }//switch + }//method + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SpecialActionsHints/TokenPreviewHint.cs b/src/Irony/Parsing/Parsers/SpecialActionsHints/TokenPreviewHint.cs new file mode 100644 index 0000000..fe54435 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SpecialActionsHints/TokenPreviewHint.cs @@ -0,0 +1,184 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.Parsers.ParserActions; + +// Original implementation is contributed by Alexey Yakovlev (yallie) + +namespace Sanchime.Irony.Parsing.Parsers.SpecialActionsHints +{ + using ConditionalEntry = ConditionalParserAction.ConditionalEntry; + + public class TokenPreviewHint : GrammarHint + { + public int MaxPreviewTokens = 1000; + private PreferredActionType _actionType; + private string _firstString; + private StringSet _beforeStrings = new(); + private Terminal _firstTerminal; + + private TerminalSet _beforeTerminals = new + (); + + private string _description; + + public TokenPreviewHint(PreferredActionType actionType, string thisSymbol, params string[] comesBefore) + { + _actionType = actionType; + _firstString = thisSymbol; + _beforeStrings.AddRange(comesBefore); + } + + public TokenPreviewHint(PreferredActionType actionType, Terminal thisTerm, params Terminal[] comesBefore) + { + _actionType = actionType; + _firstTerminal = thisTerm; + _beforeTerminals.UnionWith(comesBefore); + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + // convert strings to terminals, if needed + _firstTerminal ??= Grammar.ToTerm(_firstString); + if (_beforeStrings.Count > 0) + { + //SL pukes here, it does not support co/contravariance in full, we have to do it long way + foreach (var s in _beforeStrings) + _beforeTerminals.Add(Grammar.ToTerm(s)); + } + //Build description + var beforeTerms = string.Join(" ", _beforeTerminals.Select(t => t.Name)); + _description = string.Format("{0} if {1} comes before {2}.", _actionType, _firstTerminal.Name, beforeTerms); + } + + public override string ToString() + { + if (_description == null) + _description = _actionType.ToString() + " if ..."; + return _description; + } + + public override void Apply(LanguageData language, LRItem owner) + { + var state = owner.State; + if (!state.BuilderData.IsInadequate) return; //the state is adequate, we don't need to do anything + var conflicts = state.BuilderData.Conflicts; + // Note that we remove lookaheads from the state conflicts set at the end of this method - to let parser builder know + // that this conflict is taken care of. + // On the other hand we may call this method multiple times for different LRItems if we have multiple hints in the same state. + // Since we remove lookahead from conflicts on the first call, on the consequitive calls it will not be a conflict - + // but we still need to add a new conditional entry to a conditional parser action for this lookahead. + // Thus we process the lookahead anyway, even if it is not a conflict. + // if (conflicts.Count == 0) return; -- this is a wrong thing to do + switch (_actionType) + { + case PreferredActionType.Reduce: + if (!owner.Core.IsFinal) return; + //it is reduce action; find lookaheads in conflict + var lkhs = owner.Lookaheads; + if (lkhs.Count == 0) return; //if no conflicts then nothing to do + var reduceAction = new ReduceParserAction(owner.Core.Production); + var reduceCondEntry = new ConditionalEntry(CheckCondition, reduceAction, _description); + foreach (var lkh in lkhs) + { + AddConditionalEntry(state, lkh, reduceCondEntry); + if (conflicts.Contains(lkh)) + conflicts.Remove(lkh); + } + break; + + case PreferredActionType.Shift: + if (owner.Core.Current is not Terminal curr) return; //it is either reduce item, or curr is a NonTerminal - we cannot shift it + var shiftAction = new ShiftParserAction(owner); + var shiftCondEntry = new ConditionalEntry(CheckCondition, shiftAction, _description); + AddConditionalEntry(state, curr, shiftCondEntry); + if (conflicts.Contains(curr)) + conflicts.Remove(curr); + break; + } + }//method + + private bool CheckCondition(ParsingContext context) + { + var scanner = context.Parser.Scanner; + try + { + var eof = Grammar.Eof; + var count = 0; + scanner.BeginPreview(); + var token = scanner.GetToken(); + while (token != null && token.Terminal != eof) + { + if (token.Terminal == _firstTerminal) + return true; //found! + if (_beforeTerminals.Contains(token.Terminal)) + return false; + if (++count > MaxPreviewTokens && MaxPreviewTokens > 0) + return false; + token = scanner.GetToken(); + } + return false; + } + finally + { + scanner.EndPreview(true); + } + } + + //Check if there is an action already in state for this term; if yes, and it is Conditional action, + // then simply add an extra conditional entry to it. If an action does not exist, or it is not conditional, + // create new conditional action for this term. + private void AddConditionalEntry(ParserState state, BnfTerm term, ConditionalEntry entry) + { + ConditionalParserAction condAction = null; + if (state.Actions.TryGetValue(term, out ParserAction oldAction)) + condAction = oldAction as ConditionalParserAction; + if (condAction == null) + { //there's no old action, or it is not conditional; create new conditional action + condAction = new ConditionalParserAction + { + DefaultAction = oldAction + }; + state.Actions[term] = condAction; + } + condAction.ConditionalEntries.Add(entry); + if (condAction.DefaultAction == null) + condAction.DefaultAction = FindDefaultAction(state, term); + if (condAction.DefaultAction == null) //if still no action, then use the cond. action as default. + condAction.DefaultAction = entry.Action; + } + + //Find an LR item without hints compatible with term (either shift on term or reduce with term as lookahead); + // this item without hints would become our default. We assume that other items have hints, and when conditions + // on all these hints fail, we chose this remaining item without hints. + private ParserAction FindDefaultAction(ParserState state, BnfTerm term) + { + //First check reduce items + var reduceItems = state.BuilderData.ReduceItems.SelectByLookahead(term as Terminal); + foreach (var item in reduceItems.Where(item => item.Core.Hints.Count == 0)) + { + return ReduceParserAction.Create(item.Core.Production); + } + + var shiftItem = state.BuilderData.ShiftItems.SelectByCurrent(term).FirstOrDefault(); + if (shiftItem is not null) + { + return new ShiftParserAction(shiftItem); + } + //if everything failed, returned first reduce item + return null; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Parsers/SyntaxError.cs b/src/Irony/Parsing/Parsers/SyntaxError.cs new file mode 100644 index 0000000..56e2ed5 --- /dev/null +++ b/src/Irony/Parsing/Parsers/SyntaxError.cs @@ -0,0 +1,44 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Parsers +{ + //Container for syntax error + public class SyntaxError + { + public SyntaxError(SourceLocation location, string message, ParserState parserState) + { + Location = location; + Message = message; + ParserState = parserState; + } + + public readonly SourceLocation Location; + public readonly string Message; + public ParserState ParserState; + + public override string ToString() + { + return Message; + } + }//class + + public class SyntaxErrorList : List + { + public static int ByLocation(SyntaxError x, SyntaxError y) + { + return SourceLocation.Compare(x.Location, y.Location); + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/Scanner.cs b/src/Irony/Parsing/Scanners/Scanner.cs new file mode 100644 index 0000000..1891177 --- /dev/null +++ b/src/Irony/Parsing/Scanners/Scanner.cs @@ -0,0 +1,360 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Parsing.TokenFilters; + +namespace Sanchime.Irony.Parsing.Scanners +{ + //Scanner class. The Scanner's function is to transform a stream of characters into aggregates/words or lexemes, + // like identifier, number, literal, etc. + + public class Scanner + { + #region Properties and Fields: Data, _source + + public readonly ScannerData Data; + public readonly Parser Parser; + private readonly Grammar _grammar; + //buffered tokens can come from expanding a multi-token, when Terminal.TryMatch() returns several tokens packed into one token + + private ParsingContext Context + { + get { return Parser.Context; } + } + + #endregion + + public Scanner(Parser parser) + { + Parser = parser; + Data = parser.Language.ScannerData; + _grammar = parser.Language.Grammar; + //create token streams + var tokenStream = GetUnfilteredTokens(); + //chain all token filters + Context.TokenFilters.Clear(); + _grammar.CreateTokenFilters(Data.Language, Context.TokenFilters); + foreach (TokenFilter filter in Context.TokenFilters) + { + tokenStream = filter.BeginFiltering(Context, tokenStream); + } + Context.FilteredTokens = tokenStream.GetEnumerator(); + } + + internal void Reset() + { + } + + public Token GetToken() + { + //get new token from pipeline + if (!Context.FilteredTokens.MoveNext()) return null; + var token = Context.FilteredTokens.Current; + if (Context.Status == ParserStatus.Previewing) + Context.PreviewTokens.Push(token); + else + Context.CurrentParseTree.Tokens.Add(token); + return token; + } + + //This is iterator method, so it returns immediately when called directly + // returns unfiltered, "raw" token stream + private IEnumerable GetUnfilteredTokens() + { + //We don't do "while(!_source.EOF())... because on EOF() we need to continue and produce EOF token + while (true) + { + Context.PreviousToken = Context.CurrentToken; + Context.CurrentToken = null; + NextToken(); + Context.OnTokenCreated(); + yield return Context.CurrentToken; + //Don't yield break, continue returning EOF + }//while + }// method + + #region Scanning tokens + + private void NextToken() + { + //1. Check if there are buffered tokens + if (Context.BufferedTokens.Count > 0) + { + Context.CurrentToken = Context.BufferedTokens.Pop(); + return; + } + //2. Skip whitespace. + _grammar.SkipWhitespace(Context.Source); + //3. That's the token start, calc location (line and column) + Context.Source.Position = Context.Source.PreviewPosition; + //4. Check for EOF + if (Context.Source.EOF()) + { + Context.CurrentToken = new Token(_grammar.Eof, Context.Source.Location, string.Empty, _grammar.Eof.Name); ; + return; + } + //5. Actually scan the source text and construct a new token + ScanToken(); + }//method + + //Scans the source text and constructs a new token + private void ScanToken() + { + if (!MatchNonGrammarTerminals() && !MatchRegularTerminals()) + { + //we are in error already; try to match ANY terminal and let the parser report an error + MatchAllTerminals(); //try to match any terminal out there + } + var token = Context.CurrentToken; + //If we have normal token then return it + if (token != null && !token.IsError()) + { + var src = Context.Source; + //set position to point after the result token + src.PreviewPosition = src.Position + token.Length; + src.Position = src.PreviewPosition; + return; + } + //we have an error: either error token or no token at all + if (token == null) //if no token then create error token + Context.CurrentToken = Context.CreateErrorToken(Resources.ErrInvalidChar, Context.Source.PreviewChar); + Recover(); + } + + private bool MatchNonGrammarTerminals() + { + if (!Data.NonGrammarTerminalsLookup.TryGetValue(Context.Source.PreviewChar, out TerminalList terms)) + return false; + foreach (var term in terms) + { + Context.Source.PreviewPosition = Context.Source.Location.Position; + Context.CurrentToken = term.TryMatch(Context, Context.Source); + if (Context.CurrentToken != null) + term.OnValidateToken(Context); + if (Context.CurrentToken != null) + { + //check if we need to fire LineStart token before this token; + // we do it only if the token is not a comment; comments should be ignored by the outline logic + var token = Context.CurrentToken; + if (token.Category == TokenCategory.Content && NeedLineStartToken(token.Location)) + { + Context.BufferedTokens.Push(token); //buffer current token; we'll eject LineStart instead + Context.Source.Location = token.Location; //set it back to the start of the token + Context.CurrentToken = Context.Source.CreateToken(_grammar.LineStartTerminal); //generate LineStart + Context.PreviousLineStart = Context.Source.Location; //update LineStart + } + return true; + }//if + }//foreach term + Context.Source.PreviewPosition = Context.Source.Location.Position; + return false; + } + + private bool NeedLineStartToken(SourceLocation forLocation) + { + return _grammar.LanguageFlags.IsSet(LanguageFlags.EmitLineStartToken) && + forLocation.Line > Context.PreviousLineStart.Line; + } + + private bool MatchRegularTerminals() + { + //We need to eject LineStart BEFORE we try to produce a real token; this LineStart token should reach + // the parser, make it change the state and with it to change the set of expected tokens. So when we + // finally move to scan the real token, the expected terminal set is correct. + if (NeedLineStartToken(Context.Source.Location)) + { + Context.CurrentToken = Context.Source.CreateToken(_grammar.LineStartTerminal); + Context.PreviousLineStart = Context.Source.Location; + return true; + } + //Find matching terminal + // First, try terminals with explicit "first-char" prefixes, selected by current char in source + ComputeCurrentTerminals(); + //If we have more than one candidate; let grammar method select + if (Context.CurrentTerminals.Count > 1) + _grammar.OnScannerSelectTerminal(Context); + + MatchTerminals(); + //If we don't have a token from terminals, try Grammar's method + if (Context.CurrentToken == null) + Context.CurrentToken = _grammar.TryMatch(Context, Context.Source); + if (Context.CurrentToken is MultiToken) + UnpackMultiToken(); + return Context.CurrentToken != null; + }//method + + // This method is a last attempt by scanner to match ANY terminal, after regular matching (by input char) had failed. + // Likely this will produce some token which is invalid for current parser state (for ex, identifier where a number + // is expected); in this case the parser will report an error as "Error: expected number". + // if this matching fails, the scanner will produce an error as "unexpected character." + private bool MatchAllTerminals() + { + Context.CurrentTerminals.Clear(); + Context.CurrentTerminals.AddRange(Data.Language.GrammarData.Terminals); + MatchTerminals(); + if (Context.CurrentToken is MultiToken) + UnpackMultiToken(); + return Context.CurrentToken != null; + } + + //If token is MultiToken then push all its child tokens into _bufferdTokens and return the first token in buffer + private void UnpackMultiToken() + { + if (Context.CurrentToken is not MultiToken mtoken) return; + for (int i = mtoken.ChildTokens.Count - 1; i >= 0; i--) + Context.BufferedTokens.Push(mtoken.ChildTokens[i]); + Context.CurrentToken = Context.BufferedTokens.Pop(); + } + + private void ComputeCurrentTerminals() + { + Context.CurrentTerminals.Clear(); + if (!Data.TerminalsLookup.TryGetValue(Context.Source.PreviewChar, out TerminalList termsForCurrentChar)) + termsForCurrentChar = Data.NoPrefixTerminals; + //if we are recovering, previewing or there's no parser state, then return list as is + if (Context.Status == ParserStatus.Recovering || Context.Status == ParserStatus.Previewing + || Context.CurrentParserState == null || _grammar.LanguageFlags.IsSet(LanguageFlags.DisableScannerParserLink) + || Context.Mode == ParseMode.VsLineScan) + { + Context.CurrentTerminals.AddRange(termsForCurrentChar); + return; + } + // Try filtering terms by checking with parser which terms it expects; + var parserState = Context.CurrentParserState; + foreach (var term in termsForCurrentChar) + { + //Note that we check the OutputTerminal with parser, not the term itself; + //in most cases it is the same as term, but not always + if (parserState.ExpectedTerminals.Contains(term.OutputTerminal) || _grammar.NonGrammarTerminals.Contains(term)) + Context.CurrentTerminals.Add(term); + } + }//method + + private void MatchTerminals() + { + Token priorToken = null; + for (int i = 0; i < Context.CurrentTerminals.Count; i++) + { + var term = Context.CurrentTerminals[i]; + // If we have priorToken from prior term in the list, check if prior term has higher priority than this term; + // if term.Priority is lower then we don't need to check anymore, higher priority (in prior token) wins + // Note that terminals in the list are sorted in descending priority order + if (priorToken != null && priorToken.Terminal.Priority > term.Priority) + return; + //Reset source position and try to match + Context.Source.PreviewPosition = Context.Source.Location.Position; + var token = term.TryMatch(Context, Context.Source); + if (token == null) continue; + //skip it if it is shorter than previous token + if (priorToken != null && !priorToken.IsError() && token.Length < priorToken.Length) + continue; + Context.CurrentToken = token; //now it becomes current token + term.OnValidateToken(Context); //validate it + if (Context.CurrentToken != null) + priorToken = Context.CurrentToken; + } + }//method + + #endregion + + #region VS Integration methods + + //Use this method for VS integration; VS language package requires scanner that returns tokens one-by-one. + // Start and End positions required by this scanner may be derived from Token : + // start=token.Location.Position; end=start + token.Length; + public Token VsReadToken(ref int state) + { + Context.VsLineScanState.Value = state; + if (Context.Source.EOF()) return null; + if (state == 0) + NextToken(); + else + { + Terminal term = Data.MultilineTerminals[Context.VsLineScanState.TerminalIndex - 1]; + Context.CurrentToken = term.TryMatch(Context, Context.Source); + } + //set state value from context + state = Context.VsLineScanState.Value; + if (Context.CurrentToken != null && Context.CurrentToken.Terminal == _grammar.Eof) + return null; + return Context.CurrentToken; + } + + public void VsSetSource(string text, int offset) + { + var line = Context.Source == null ? 0 : Context.Source.Location.Line; + var newLoc = new SourceLocation(offset, line + 1, 0); + Context.Source = new SourceStream(text, Context.Language.Grammar.CaseSensitive, Context.TabWidth, newLoc); + } + + #endregion + + #region Error recovery + + //Simply skip until whitespace or delimiter character + private bool Recover() + { + var src = Context.Source; + src.PreviewPosition++; + while (!Context.Source.EOF()) + { + if (_grammar.IsWhitespaceOrDelimiter(src.PreviewChar)) + { + src.Position = src.PreviewPosition; + return true; + } + src.PreviewPosition++; + } + return false; + } + + #endregion + + #region TokenPreview + + //Preview mode allows custom code in grammar to help parser decide on appropriate action in case of conflict + // Preview process is simply searching for particular tokens in "preview set", and finding out which of the + // tokens will come first. + // In preview mode, tokens returned by FetchToken are collected in _previewTokens list; after finishing preview + // the scanner "rolls back" to original position - either by directly restoring the position, or moving the preview + // tokens into _bufferedTokens list, so that they will read again by parser in normal mode. + // See c# grammar sample for an example of using preview methods + private SourceLocation _previewStartLocation; + + //Switches Scanner into preview mode + public void BeginPreview() + { + Context.Status = ParserStatus.Previewing; + _previewStartLocation = Context.Source.Location; + Context.PreviewTokens.Clear(); + } + + //Ends preview mode + public void EndPreview(bool keepPreviewTokens) + { + if (keepPreviewTokens) + { + //insert previewed tokens into buffered list, so we don't recreate them again + while (Context.PreviewTokens.Count > 0) + Context.BufferedTokens.Push(Context.PreviewTokens.Pop()); + } + else + Context.SetSourceLocation(_previewStartLocation); + Context.PreviewTokens.Clear(); + Context.Status = ParserStatus.Parsing; + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/SourceLocation.cs b/src/Irony/Parsing/Scanners/SourceLocation.cs new file mode 100644 index 0000000..5de2106 --- /dev/null +++ b/src/Irony/Parsing/Scanners/SourceLocation.cs @@ -0,0 +1,92 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Scanners +{ + public struct SourceLocation + { + public int Position; + + /// Source line number, 0-based. + public int Line; + + /// Source column number, 0-based. + public int Column; + + public SourceLocation(int position, int line, int column) + { + Position = position; + Line = line; + Column = column; + } + + //Line/col are zero-based internally + public override string ToString() + { + return string.Format(Resources.FmtRowCol, Line + 1, Column + 1); + } + + //Line and Column displayed to user should be 1-based + public string ToUiString() + { + return string.Format(Resources.FmtRowCol, Line + 1, Column + 1); + } + + public static int Compare(SourceLocation x, SourceLocation y) + { + if (x.Position < y.Position) return -1; + if (x.Position == y.Position) return 0; + return 1; + } + + public static SourceLocation Empty + { + get { return _empty; } + } + + private static SourceLocation _empty = new SourceLocation(); + + public static SourceLocation operator +(SourceLocation x, SourceLocation y) + { + return new SourceLocation(x.Position + y.Position, x.Line + y.Line, x.Column + y.Column); + } + + public static SourceLocation operator +(SourceLocation x, int offset) + { + return new SourceLocation(x.Position + offset, x.Line, x.Column + offset); + } + }//SourceLocation + + public struct SourceSpan + { + public readonly SourceLocation Location; + public readonly int Length; + + public SourceSpan(SourceLocation location, int length) + { + Location = location; + Length = length; + } + + public int EndPosition + { + get { return Location.Position + Length; } + } + + public bool InRange(int position) + { + return position >= Location.Position && position <= EndPosition; + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/SourceStream.cs b/src/Irony/Parsing/Scanners/SourceStream.cs new file mode 100644 index 0000000..e4c7167 --- /dev/null +++ b/src/Irony/Parsing/Scanners/SourceStream.cs @@ -0,0 +1,189 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Scanners +{ + public class SourceStream : ISourceStream + { + private StringComparison _stringComparison; + private int _tabWidth; + private char[] _chars; + private int _textLength; + + public SourceStream(string text, bool caseSensitive, int tabWidth) : this(text, caseSensitive, tabWidth, new SourceLocation()) + { + } + + public SourceStream(string text, bool caseSensitive, int tabWidth, SourceLocation initialLocation) + { + _text = text; + _textLength = _text.Length; + _chars = Text.ToCharArray(); + _stringComparison = caseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; + _tabWidth = tabWidth; + _location = initialLocation; + _previewPosition = _location.Position; + if (_tabWidth <= 1) + _tabWidth = 8; + } + + #region ISourceStream Members + + public string Text + { + get { return _text; } + } + + private string _text; + + public int Position + { + get { return _location.Position; } + set + { + if (_location.Position != value) + SetNewPosition(value); + } + } + + public SourceLocation Location + { + [System.Diagnostics.DebuggerStepThrough] + get { return _location; } + set { _location = value; } + } + + private SourceLocation _location; + + public int PreviewPosition + { + get { return _previewPosition; } + set { _previewPosition = value; } + } + + private int _previewPosition; + + public char PreviewChar + { + [System.Diagnostics.DebuggerStepThrough] + get + { + if (_previewPosition >= _textLength) + return '\0'; + return _chars[_previewPosition]; + } + } + + public char NextPreviewChar + { + [System.Diagnostics.DebuggerStepThrough] + get + { + if (_previewPosition + 1 >= _textLength) return '\0'; + return _chars[_previewPosition + 1]; + } + } + + public bool MatchSymbol(string symbol) + { + try + { + int cmp = string.Compare(_text, PreviewPosition, symbol, 0, symbol.Length, _stringComparison); + return cmp == 0; + } + catch + { + //exception may be thrown if Position + symbol.length > text.Length; + // this happens not often, only at the very end of the file, so we don't check this explicitly + //but simply catch the exception and return false. Again, try/catch block has no overhead + // if exception is not thrown. + return false; + } + } + + public Token CreateToken(Terminal terminal) + { + var tokenText = GetPreviewText(); + return new Token(terminal, Location, tokenText, tokenText); + } + + public Token CreateToken(Terminal terminal, object value) + { + var tokenText = GetPreviewText(); + return new Token(terminal, Location, tokenText, value); + } + + [System.Diagnostics.DebuggerStepThrough] + public bool EOF() + { + return _previewPosition >= _textLength; + } + + #endregion + + //returns substring from Location.Position till (PreviewPosition - 1) + private string GetPreviewText() + { + var until = _previewPosition; + if (until > _textLength) until = _textLength; + var p = _location.Position; + string text = Text.Substring(p, until - p); + return text; + } + + // To make debugging easier: show 20 chars from current position + public override string ToString() + { + string result; + try + { + var p = Location.Position; + if (p + 20 < _textLength) + result = _text.Substring(p, 20) + Resources.LabelSrcHaveMore;// " ..." + else + result = _text.Substring(p) + Resources.LabelEofMark; //"(EOF)" + } + catch (Exception) + { + result = PreviewChar + Resources.LabelSrcHaveMore; + } + return string.Format(Resources.MsgSrcPosToString, result, Location); //"[{0}], at {1}" + } + + //Computes the Location info (line, col) for a new source position. + private void SetNewPosition(int newPosition) + { + if (newPosition < Position) + throw new Exception(Resources.ErrCannotMoveBackInSource); + int p = Position; + int col = Location.Column; + int line = Location.Line; + while (p < newPosition) + { + if (p >= _textLength) + break; + var curr = _chars[p]; + switch (curr) + { + case '\n': line++; col = 0; break; + case '\r': break; + case '\t': col = (col / _tabWidth + 1) * _tabWidth; break; + default: col++; break; + } //switch + p++; + } + Location = new SourceLocation(p, line, col); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/Token.cs b/src/Irony/Parsing/Scanners/Token.cs new file mode 100644 index 0000000..dc65027 --- /dev/null +++ b/src/Irony/Parsing/Scanners/Token.cs @@ -0,0 +1,118 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Scanners +{ + public enum TokenFlags + { + IsIncomplete = 0x01, + } + + public enum TokenCategory + { + Content, + Outline, //newLine, indent, dedent + Comment, + Directive, + Error, + } + + public class TokenList : List + { } + + public class TokenStack : Stack + { } + + //Tokens are produced by scanner and fed to parser, optionally passing through Token filters in between. + public partial class Token + { + public Terminal Terminal { get; private set; } + public KeyTerm KeyTerm; + public readonly SourceLocation Location; + public readonly string Text; + + public object Value; + + public string ValueString + { + get { return Value == null ? string.Empty : Value.ToString(); } + } + + public object Details; + public TokenFlags Flags; + public TokenEditorInfo EditorInfo; + + public Token(Terminal term, SourceLocation location, string text, object value) + { + SetTerminal(term); + KeyTerm = term as KeyTerm; + Location = location; + Text = text; + Value = value; + } + + public void SetTerminal(Terminal terminal) + { + Terminal = terminal; + EditorInfo = Terminal.EditorInfo; //set to term's EditorInfo by default + } + + public bool IsSet(TokenFlags flag) + { + return (Flags & flag) != 0; + } + + public TokenCategory Category + { + get { return Terminal.Category; } + } + + public bool IsError() + { + return Category == TokenCategory.Error; + } + + public int Length + { + get { return Text == null ? 0 : Text.Length; } + } + + //matching opening/closing brace + public Token OtherBrace; + + public short ScannerState; //Scanner state after producing token + + [System.Diagnostics.DebuggerStepThrough] + public override string ToString() + { + return Terminal.TokenToString(this); + }//method + }//class + + //Some terminals may need to return a bunch of tokens in one call to TryMatch; MultiToken is a container for these tokens + public class MultiToken : Token + { + public TokenList ChildTokens; + + public MultiToken(params Token[] tokens) : this(tokens[0].Terminal, tokens[0].Location, new TokenList()) + { + ChildTokens.AddRange(tokens); + } + + public MultiToken(Terminal term, SourceLocation location, TokenList childTokens) : base(term, location, string.Empty, null) + { + ChildTokens = childTokens; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/TokenEditorInfo.cs b/src/Irony/Parsing/Scanners/TokenEditorInfo.cs new file mode 100644 index 0000000..fa5575d --- /dev/null +++ b/src/Irony/Parsing/Scanners/TokenEditorInfo.cs @@ -0,0 +1,117 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Scanners +{ + // Helper classes for information used by syntax highlighters and editors + // TokenColor, TokenTriggers and TokenType are copied from the Visual studio integration assemblies. + // Each terminal/token would have its TokenEditorInfo that can be used either by VS integration package + // or any editor for syntax highligting. + + public class TokenEditorInfo + { + public readonly TokenType Type; + public readonly TokenColor Color; + public readonly TokenTriggers Triggers; + public string ToolTip; + public int UnderlineType; + + public TokenEditorInfo(TokenType type, TokenColor color, TokenTriggers triggers) + { + Type = type; + Color = color; + Triggers = triggers; + } + }//class + + public enum TokenColor + { + Text = 0, + Keyword = 1, + Comment = 2, + Identifier = 3, + String = 4, + Number = 5, + } + + // (Comments are coming from visual studio integration package) + // Specifies a set of triggers that can be fired from an Microsoft.VisualStudio.Package.IScanner + // language parser. + [Flags] + public enum TokenTriggers + { + // Summary: + // Used when no triggers are set. This is the default. + None = 0, + + // + // Summary: + // A character that indicates that the start of a member selection has been + // parsed. In C#, this could be a period following a class name. In XML, this + // could be a < (the member select is a list of possible tags). + MemberSelect = 1, + + // + // Summary: + // The opening or closing part of a language pair has been parsed. For example, + // in C#, a { or } has been parsed. In XML, a < or > has been parsed. + MatchBraces = 2, + + // + // Summary: + // A character that marks the start of a parameter list has been parsed. For + // example, in C#, this could be an open parenthesis, "(". + ParameterStart = 16, + + // + // Summary: + // A character that separates parameters in a list has been parsed. For example, + // in C#, this could be a comma, ",". + ParameterNext = 32, + + // + // Summary: + // A character that marks the end of a parameter list has been parsed. For example, + // in C#, this could be a close parenthesis, ")". + ParameterEnd = 64, + + // + // Summary: + // A parameter in a method's parameter list has been parsed. + Parameter = 128, + + // + // Summary: + // This is a mask for the flags used to govern the IntelliSense Method Tip operation. + // This mask is used to isolate the values Microsoft.VisualStudio.Package.TokenTriggers.Parameter, + // Microsoft.VisualStudio.Package.TokenTriggers.ParameterStart, Microsoft.VisualStudio.Package.TokenTriggers.ParameterNext, + // and Microsoft.VisualStudio.Package.TokenTriggers.ParameterEnd. + MethodTip = 240, + } + + public enum TokenType + { + Unknown = 0, + Text = 1, + Keyword = 2, + Identifier = 3, + String = 4, + Literal = 5, + Operator = 6, + Delimiter = 7, + WhiteSpace = 8, + LineComment = 9, + Comment = 10, + } +} \ No newline at end of file diff --git a/src/Irony/Parsing/Scanners/_ISourceStream.cs b/src/Irony/Parsing/Scanners/_ISourceStream.cs new file mode 100644 index 0000000..6e1cedd --- /dev/null +++ b/src/Irony/Parsing/Scanners/_ISourceStream.cs @@ -0,0 +1,82 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Scanners +{ + /// + /// Interface for Terminals to access the source stream and produce tokens. + /// + public interface ISourceStream + { + /// + /// Returns the source text + /// + string Text { get; } + + /// + /// Gets or sets the start location (position, row, column) of the new token + /// + SourceLocation Location { get; set; } + + /// + /// Gets or sets the current position in the source file. When reading the value, returns Location.Position value. + /// When a new value is assigned, the Location is modified accordingly. + /// + int Position { get; set; } + + /// + /// Gets or sets the current preview position in the source file. Must be greater or equal to Location.Position + /// + int PreviewPosition { get; set; } + + /// + /// Gets a char at preview position + /// + char PreviewChar { get; } + + /// + /// Gets the char at position next after the PrevewPosition + /// + char NextPreviewChar { get; } //char at PreviewPosition+1 + + /// + /// Creates a new token based on current preview position. + /// + /// A terminal associated with the token. + /// New token. + Token CreateToken(Terminal terminal); + + /// + /// Creates a new token based on current preview position and sets its Value field. + /// + /// A terminal associated with the token. + /// The value associated with the token. + /// New token. + Token CreateToken(Terminal terminal, object value); + + /// Tries to match the symbol with the text at current preview position. + /// A symbol to match + /// True if there is a match; otherwise, false. + bool MatchSymbol(string symbol); + + bool EOF(); + + /* + //This member is intentionally removed from ISourceStream and made private in SourceStream class. The purpose is to discourage + its use or imitation - it produces a new string object which means new garbage for GC. All Irony-defined Terminal classes + are implemented without it, but you can always reproduce the implementation in your custom code if you really need it + string GetPreviewText(); + */ + }//interface +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/CommentTerminal.cs b/src/Irony/Parsing/Terminals/CommentTerminal.cs new file mode 100644 index 0000000..015d4aa --- /dev/null +++ b/src/Irony/Parsing/Terminals/CommentTerminal.cs @@ -0,0 +1,134 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + public class CommentTerminal : Terminal + { + public CommentTerminal(string name, string startSymbol, params string[] endSymbols) : base(name, TokenCategory.Comment) + { + StartSymbol = startSymbol; + EndSymbols = new StringList(); + EndSymbols.AddRange(endSymbols); + Priority = TerminalPriority.High; //assign max priority + } + + public string StartSymbol; + public StringList EndSymbols; + private char[] _endSymbolsFirsts; + private bool _isLineComment; //true if NewLine is one of EndSymbols; if yes, EOF is also considered a valid end symbol + + #region overrides + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + //_endSymbolsFirsts char array is used for fast search for end symbols using String's method IndexOfAny(...) + _endSymbolsFirsts = new char[EndSymbols.Count]; + for (int i = 0; i < EndSymbols.Count; i++) + { + string sym = EndSymbols[i]; + _endSymbolsFirsts[i] = sym[0]; + _isLineComment |= sym.Contains("\n"); + if (!_isLineComment) + SetFlag(TermFlags.IsMultiline); + } + if (EditorInfo == null) + { + TokenType ttype = _isLineComment ? TokenType.LineComment : TokenType.Comment; + EditorInfo = new TokenEditorInfo(ttype, TokenColor.Comment, TokenTriggers.None); + } + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + Token result; + if (context.VsLineScanState.Value != 0) + { + // we are continuing in line mode - restore internal env (none in this case) + context.VsLineScanState.Value = 0; + } + else + { + //we are starting from scratch + if (!BeginMatch(context, source)) return null; + } + result = CompleteMatch(context, source); + if (result != null) return result; + //if it is LineComment, it is ok to hit EOF without final line-break; just return all until end. + if (_isLineComment) + return source.CreateToken(OutputTerminal); + if (context.Mode == ParseMode.VsLineScan) + return CreateIncompleteToken(context, source); + return context.CreateErrorToken(Resources.ErrUnclosedComment); + } + + private Token CreateIncompleteToken(ParsingContext context, ISourceStream source) + { + source.PreviewPosition = source.Text.Length; + Token result = source.CreateToken(OutputTerminal); + result.Flags |= TokenFlags.IsIncomplete; + context.VsLineScanState.TerminalIndex = MultilineIndex; + return result; + } + + private bool BeginMatch(ParsingContext context, ISourceStream source) + { + //Check starting symbol + if (!source.MatchSymbol(StartSymbol)) return false; + source.PreviewPosition += StartSymbol.Length; + return true; + } + + private Token CompleteMatch(ParsingContext context, ISourceStream source) + { + //Find end symbol + while (!source.EOF()) + { + int firstCharPos; + if (EndSymbols.Count == 1) + firstCharPos = source.Text.IndexOf(EndSymbols[0], source.PreviewPosition); + else + firstCharPos = source.Text.IndexOfAny(_endSymbolsFirsts, source.PreviewPosition); + if (firstCharPos < 0) + { + source.PreviewPosition = source.Text.Length; + return null; //indicating error + } + //We found a character that might start an end symbol; let's see if it is true. + source.PreviewPosition = firstCharPos; + foreach (string endSymbol in EndSymbols) + { + if (source.MatchSymbol(endSymbol)) + { + //We found end symbol; eat end symbol only if it is not line comment. + // For line comment, leave LF symbol there, it might be important to have a separate LF token + if (!_isLineComment) + source.PreviewPosition += endSymbol.Length; + return source.CreateToken(OutputTerminal); + }//if + }//foreach endSymbol + source.PreviewPosition++; //move to the next char and try again + }//while + return null; //might happen if we found a start char of end symbol, but not the full endSymbol + }//method + + public override IList GetFirsts() + { + return new string[] { StartSymbol }; + } + + #endregion + }//CommentTerminal class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/CompoundTerminalBase.cs b/src/Irony/Parsing/Terminals/CompoundTerminalBase.cs new file mode 100644 index 0000000..ea1a625 --- /dev/null +++ b/src/Irony/Parsing/Terminals/CompoundTerminalBase.cs @@ -0,0 +1,305 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + #region About compound terminals + + /* + As it turns out, many terminal types in real-world languages have 3-part structure: prefix-body-suffix + The body is essentially the terminal "value", while prefix and suffix are used to specify additional + information (options), while not being a part of the terminal itself. + For example: + 1. c# numbers, may have 0x prefix for hex representation, and suffixes specifying + the exact data type of the literal (f, l, m, etc) + 2. c# string may have "@" prefix which disables escaping inside the string + 3. c# identifiers may have "@" prefix and escape sequences inside - just like strings + 4. Python string may have "u" and "r" prefixes, "r" working the same way as @ in c# strings + 5. VB string literals may have "c" suffix identifying that the literal is a character, not a string + 6. VB number literals and identifiers may have suffixes identifying data type + + So it seems like all these terminals have the format "prefix-body-suffix". + The CompoundTerminalBase base class implements base functionality supporting this multi-part structure. + The IdentifierTerminal, NumberLiteral and StringLiteral classes inherit from this base class. + The methods in TerminalFactory static class demonstrate that with this architecture we can define the whole + variety of terminals for c#, Python and VB.NET languages. + */ + + #endregion + + public class EscapeTable : Dictionary + { } + + public abstract class CompoundTerminalBase : Terminal + { + #region Nested classes + + protected class ScanFlagTable : Dictionary + { } + + protected class TypeCodeTable : Dictionary + { } + + public class CompoundTokenDetails + { + public string Prefix; + public string Body; + public string Suffix; + public string Sign; + public short Flags; //need to be short, because we need to save it in Scanner state for Vs integration + public string Error; + public TypeCode[] TypeCodes; + public string ExponentSymbol; //exponent symbol for Number literal + public string StartSymbol; //string start and end symbols + public string EndSymbol; + public object Value; + + //partial token info, used by VS integration + public bool PartialOk; + + public bool IsPartial; + public bool PartialContinues; + public byte SubTypeIndex; //used for string literal kind + + //Flags helper method + public bool IsSet(short flag) + { + return (Flags & flag) != 0; + } + + public string Text + { get { return Prefix + Body + Suffix; } } + } + + #endregion + + #region constructors and initialization + + public CompoundTerminalBase(string name) : this(name, TermFlags.None) + { + } + + public CompoundTerminalBase(string name, TermFlags flags) : base(name) + { + SetFlag(flags); + Escapes = GetDefaultEscapes(); + } + + protected void AddPrefixFlag(string prefix, short flags) + { + PrefixFlags.Add(prefix, flags); + Prefixes.Add(prefix); + } + + public void AddSuffix(string suffix, params TypeCode[] typeCodes) + { + SuffixTypeCodes.Add(suffix, typeCodes); + Suffixes.Add(suffix); + } + + #endregion + + #region public Properties/Fields + + public char EscapeChar = '\\'; + public EscapeTable Escapes = new EscapeTable(); + + //Case sensitivity for prefixes and suffixes + public bool CaseSensitivePrefixesSuffixes = false; + + #endregion + + #region private fields + + protected readonly ScanFlagTable PrefixFlags = new ScanFlagTable(); + protected readonly TypeCodeTable SuffixTypeCodes = new TypeCodeTable(); + protected StringList Prefixes = new StringList(); + protected StringList Suffixes = new StringList(); + private CharHashSet _prefixesFirsts; //first chars of all prefixes, for fast prefix detection + private CharHashSet _suffixesFirsts; //first chars of all suffixes, for fast suffix detection + + #endregion + + #region overrides: Init, TryMatch + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + //collect all suffixes, prefixes in lists and create sets of first chars for both + Prefixes.Sort(StringList.LongerFirst); + Suffixes.Sort(StringList.LongerFirst); + + _prefixesFirsts = new CharHashSet(CaseSensitivePrefixesSuffixes); + _suffixesFirsts = new CharHashSet(CaseSensitivePrefixesSuffixes); + foreach (string pfx in Prefixes) + _prefixesFirsts.Add(pfx[0]); + + foreach (string sfx in Suffixes) + _suffixesFirsts.Add(sfx[0]); + }//method + + public override IList GetFirsts() + { + return Prefixes; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + Token token; + //Try quick parse first, but only if we're not continuing + if (context.VsLineScanState.Value == 0) + { + token = QuickParse(context, source); + if (token != null) return token; + source.PreviewPosition = source.Position; //revert the position + } + + CompoundTokenDetails details = new CompoundTokenDetails(); + InitDetails(context, details); + + if (context.VsLineScanState.Value == 0) + ReadPrefix(source, details); + if (!ReadBody(source, details)) + return null; + if (details.Error != null) + return context.CreateErrorToken(details.Error); + if (details.IsPartial) + { + details.Value = details.Body; + } + else + { + ReadSuffix(source, details); + + if (!ConvertValue(details, context)) + { + if (string.IsNullOrEmpty(details.Error)) + details.Error = Resources.ErrInvNumber; + return context.CreateErrorToken(details.Error); // "Failed to convert the value: {0}" + } + } + token = CreateToken(context, source, details); + + if (details.IsPartial) + { + //Save terminal state so we can continue + context.VsLineScanState.TokenSubType = details.SubTypeIndex; + context.VsLineScanState.TerminalFlags = details.Flags; + context.VsLineScanState.TerminalIndex = MultilineIndex; + } + else + context.VsLineScanState.Value = 0; + return token; + } + + protected virtual Token CreateToken(ParsingContext context, ISourceStream source, CompoundTokenDetails details) + { + var token = source.CreateToken(OutputTerminal, details.Value); + token.Details = details; + if (details.IsPartial) + token.Flags |= TokenFlags.IsIncomplete; + return token; + } + + protected virtual void InitDetails(ParsingContext context, CompoundTokenDetails details) + { + details.PartialOk = context.Mode == ParseMode.VsLineScan; + details.PartialContinues = context.VsLineScanState.Value != 0; + } + + protected virtual Token QuickParse(ParsingContext context, ISourceStream source) + { + return null; + } + + protected virtual void ReadPrefix(ISourceStream source, CompoundTokenDetails details) + { + if (!_prefixesFirsts.Contains(source.PreviewChar)) + return; + var comparisonType = CaseSensitivePrefixesSuffixes ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; + foreach (string pfx in Prefixes) + { + // Prefixes are usually case insensitive, even if language is case-sensitive. So we cannot use source.MatchSymbol here, + // we need case-specific comparison + if (string.Compare(source.Text, source.PreviewPosition, pfx, 0, pfx.Length, comparisonType) != 0) + continue; + //We found prefix + details.Prefix = pfx; + source.PreviewPosition += pfx.Length; + //Set flag from prefix + short pfxFlags; + if (!string.IsNullOrEmpty(details.Prefix) && PrefixFlags.TryGetValue(details.Prefix, out pfxFlags)) + details.Flags |= pfxFlags; + return; + }//foreach + }//method + + protected virtual bool ReadBody(ISourceStream source, CompoundTokenDetails details) + { + return false; + } + + protected virtual void ReadSuffix(ISourceStream source, CompoundTokenDetails details) + { + if (!_suffixesFirsts.Contains(source.PreviewChar)) return; + var comparisonType = CaseSensitivePrefixesSuffixes ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; + foreach (string sfx in Suffixes) + { + //Suffixes are usually case insensitive, even if language is case-sensitive. So we cannot use source.MatchSymbol here, + // we need case-specific comparison + if (string.Compare(source.Text, source.PreviewPosition, sfx, 0, sfx.Length, comparisonType) != 0) + continue; + //We found suffix + details.Suffix = sfx; + source.PreviewPosition += sfx.Length; + //Set TypeCode from suffix + TypeCode[] codes; + if (!string.IsNullOrEmpty(details.Suffix) && SuffixTypeCodes.TryGetValue(details.Suffix, out codes)) + details.TypeCodes = codes; + return; + }//foreach + }//method + + protected virtual bool ConvertValue(CompoundTokenDetails details, ParsingContext context) + { + details.Value = details.Body; + return false; + } + + #endregion + + #region utils: GetDefaultEscapes + + public static EscapeTable GetDefaultEscapes() + { + EscapeTable escapes = new EscapeTable(); + escapes.Add('a', '\u0007'); + escapes.Add('b', '\b'); + escapes.Add('t', '\t'); + escapes.Add('n', '\n'); + escapes.Add('v', '\v'); + escapes.Add('f', '\f'); + escapes.Add('r', '\r'); + escapes.Add('"', '"'); + escapes.Add('\'', '\''); + escapes.Add('\\', '\\'); + escapes.Add(' ', ' '); + escapes.Add('\n', '\n'); //this is a special escape of the linebreak itself, + // when string ends with "\" char and continues on the next line + return escapes; + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/ConstantTerminal.cs b/src/Irony/Parsing/Terminals/ConstantTerminal.cs new file mode 100644 index 0000000..a78ac48 --- /dev/null +++ b/src/Irony/Parsing/Terminals/ConstantTerminal.cs @@ -0,0 +1,73 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //This terminal allows to declare a set of constants in the input language + // It should be used when constant symbols do not look like normal identifiers; e.g. in Scheme, #t, #f are true/false + // constants, and they don't fit into Scheme identifier pattern. + public class ConstantsTable : Dictionary + { } + + public class ConstantTerminal : Terminal + { + public readonly ConstantsTable Constants = new ConstantsTable(); + + public ConstantTerminal(string name, Type nodeType = null) : base(name) + { + SetFlag(TermFlags.IsConstant); + if (nodeType != null) + AstConfig.NodeType = nodeType; + Priority = TerminalPriority.High; //constants have priority over normal identifiers + } + + public void Add(string lexeme, object value) + { + Constants[lexeme] = value; + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + if (EditorInfo == null) + EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + string text = source.Text; + foreach (var entry in Constants) + { + source.PreviewPosition = source.Position; + var constant = entry.Key; + if (source.PreviewPosition + constant.Length > text.Length) continue; + if (source.MatchSymbol(constant)) + { + source.PreviewPosition += constant.Length; + if (!Grammar.IsWhitespaceOrDelimiter(source.PreviewChar)) + continue; //make sure it is delimiter + return source.CreateToken(OutputTerminal, entry.Value); + } + } + return null; + } + + public override IList GetFirsts() + { + string[] array = new string[Constants.Count]; + Constants.Keys.CopyTo(array, 0); + return array; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/CustomTerminal.cs b/src/Irony/Parsing/Terminals/CustomTerminal.cs new file mode 100644 index 0000000..89817f1 --- /dev/null +++ b/src/Irony/Parsing/Terminals/CustomTerminal.cs @@ -0,0 +1,51 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //Terminal based on custom method; allows creating custom match without creating new class derived from Terminal + public delegate Token MatchHandler(Terminal terminal, ParsingContext context, ISourceStream source); + + public class CustomTerminal : Terminal + { + public CustomTerminal(string name, MatchHandler handler, params string[] prefixes) : base(name) + { + _handler = handler; + if (prefixes != null) + Prefixes.AddRange(prefixes); + EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); + } + + public readonly StringList Prefixes = new StringList(); + + public MatchHandler Handler + { + [System.Diagnostics.DebuggerStepThrough] + get { return _handler; } + } + + private MatchHandler _handler; + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + return _handler(this, context, source); + } + + [System.Diagnostics.DebuggerStepThrough] + public override IList GetFirsts() + { + return Prefixes; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/DataLiteralBase.cs b/src/Irony/Parsing/Terminals/DataLiteralBase.cs new file mode 100644 index 0000000..c1cfa0b --- /dev/null +++ b/src/Irony/Parsing/Terminals/DataLiteralBase.cs @@ -0,0 +1,60 @@ +namespace Sanchime.Irony.Parsing.Terminals +{ + //DataLiteralBase is a base class for a set of specialized terminals with a primary purpose of building data readers + // DsvLiteral is used for reading delimiter-separated values (DSV), comma-separated format is a specific case of DSV + // FixedLengthLiteral may be used to read values of fixed length + public class DataLiteralBase : Terminal + { + public TypeCode DataType; + + //For date format strings see MSDN help for "Custom format strings", available through help for DateTime.ParseExact(...) method + public string DateTimeFormat = "d"; //standard format, identifies MM/dd/yyyy for invariant culture. + + public int IntRadix = 10; //Radix (base) for numeric numbers + + public DataLiteralBase(string name, TypeCode dataType) : base(name) + { + DataType = dataType; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + try + { + var textValue = ReadBody(context, source); + if (textValue == null) return null; + var value = ConvertValue(context, textValue); + return source.CreateToken(OutputTerminal, value); + } + catch (Exception ex) + { + //we throw exception in DsvLiteral when we cannot find a closing quote for quoted value + return context.CreateErrorToken(ex.Message); + } + }//method + + protected virtual string ReadBody(ParsingContext context, ISourceStream source) + { + return null; + } + + protected virtual object ConvertValue(ParsingContext context, string textValue) + { + switch (DataType) + { + case TypeCode.String: return textValue; + case TypeCode.DateTime: return DateTime.ParseExact(textValue, DateTimeFormat, context.Culture); + case TypeCode.Single: + case TypeCode.Double: + var dValue = Convert.ToDouble(textValue, context.Culture); + if (DataType == TypeCode.Double) return dValue; + return Convert.ChangeType(dValue, DataType, context.Culture); + + default: //integer types + var iValue = IntRadix == 10 ? Convert.ToInt64(textValue, context.Culture) : Convert.ToInt64(textValue, IntRadix); + if (DataType == TypeCode.Int64) return iValue; + return Convert.ChangeType(iValue, DataType, context.Culture); + } + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/DsvLiteral.cs b/src/Irony/Parsing/Terminals/DsvLiteral.cs new file mode 100644 index 0000000..d796e58 --- /dev/null +++ b/src/Irony/Parsing/Terminals/DsvLiteral.cs @@ -0,0 +1,112 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //A terminal for DSV-formatted files (Delimiter-Separated Values), a generalization of CSV (comma-separated values) format. + // See http://en.wikipedia.org/wiki/Delimiter-separated_values + // For CSV format, there's a recommendation RFC4180 (http://tools.ietf.org/html/rfc4180) + // It might seem that this terminal is not that useful and it is easy enough to create a custom CSV reader for a particular data format + // format. However, if you consider all escaping and double-quote enclosing rules, then a custom reader solution would not seem so trivial. + // So DsvLiteral can simplify this task. + public class DsvLiteral : DataLiteralBase + { + public string Terminator = ","; + public bool ConsumeTerminator = true; //if true, the source pointer moves after the separator + private char[] _terminators; + + //For last value on the line specify terminator = null; the DsvLiteral will then look for NewLine as terminator + public DsvLiteral(string name, TypeCode dataType, string terminator) : this(name, dataType) + { + Terminator = terminator; + } + + public DsvLiteral(string name, TypeCode dataType) : base(name, dataType) + { + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + if (Terminator == null) + _terminators = new char[] { '\n', '\r' }; + else + _terminators = new char[] { Terminator[0] }; + } + + protected override string ReadBody(ParsingContext context, ISourceStream source) + { + string body; + if (source.PreviewChar == '"') + body = ReadQuotedBody(context, source); + else + body = ReadNotQuotedBody(context, source); + if (ConsumeTerminator && Terminator != null) + MoveSourcePositionAfterTerminator(source); + return body; + } + + private string ReadQuotedBody(ParsingContext context, ISourceStream source) + { + const char dQuoute = '"'; + StringBuilder sb = null; + var from = source.Location.Position + 1; //skip initial double quote + while (true) + { + var until = source.Text.IndexOf(dQuoute, from); + if (until < 0) + throw new Exception(Resources.ErrDsvNoClosingQuote); // "Could not find a closing quote for quoted value." + source.PreviewPosition = until; //now points at double-quote + var piece = source.Text.Substring(from, until - from); + source.PreviewPosition++; //move after double quote + if (source.PreviewChar != dQuoute && sb == null) + return piece; //quick path - if sb (string builder) was not created yet, we are looking at the very first segment; + // and if we found a standalone dquote, then we are done - the "piece" is the result. + if (sb == null) + sb = new StringBuilder(100); + sb.Append(piece); + if (source.PreviewChar != dQuoute) + return sb.ToString(); + //we have doubled double-quote; add a single double-quoute char to the result and move over both symbols + sb.Append(dQuoute); + from = source.PreviewPosition + 1; + } + } + + private string ReadNotQuotedBody(ParsingContext context, ISourceStream source) + { + var startPos = source.Location.Position; + var sepPos = source.Text.IndexOfAny(_terminators, startPos); + if (sepPos < 0) + sepPos = source.Text.Length; + source.PreviewPosition = sepPos; + var valueText = source.Text.Substring(startPos, sepPos - startPos); + return valueText; + } + + private void MoveSourcePositionAfterTerminator(ISourceStream source) + { + while (!source.EOF()) + { + while (source.PreviewChar != Terminator[0]) + source.PreviewPosition++; + if (source.MatchSymbol(Terminator)) + { + source.PreviewPosition += Terminator.Length; + return; + }//if + }//while + }//method + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/FixedLengthLiteral.cs b/src/Irony/Parsing/Terminals/FixedLengthLiteral.cs new file mode 100644 index 0000000..32af287 --- /dev/null +++ b/src/Irony/Parsing/Terminals/FixedLengthLiteral.cs @@ -0,0 +1,36 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //A terminal for representing fixed-length lexemes coming up sometimes in programming language + // (in Fortran for ex, every line starts with 5-char label, followed by a single continuation char) + // It may be also used to create grammar/parser for reading data files with fixed length fields + public class FixedLengthLiteral : DataLiteralBase + { + public int Length; + + public FixedLengthLiteral(string name, int length, TypeCode dataType) : base(name, dataType) + { + Length = length; + } + + protected override string ReadBody(ParsingContext context, ISourceStream source) + { + source.PreviewPosition = source.Location.Position + Length; + var body = source.Text.Substring(source.Location.Position, Length); + return body; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/FreeTextLiteral.cs b/src/Irony/Parsing/Terminals/FreeTextLiteral.cs new file mode 100644 index 0000000..64cf80b --- /dev/null +++ b/src/Irony/Parsing/Terminals/FreeTextLiteral.cs @@ -0,0 +1,185 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + // Sometimes language definition includes tokens that have no specific format, but are just "all text until some terminator character(s)"; + // FreeTextTerminal allows easy implementation of such language element. + + [Flags] + public enum FreeTextOptions + { + None = 0x0, + ConsumeTerminator = 0x01, //move source pointer beyond terminator (so token "consumes" it from input), but don't include it in token text + IncludeTerminator = 0x02, // include terminator into token text/value + AllowEof = 0x04, // treat EOF as legitimate terminator + AllowEmpty = 0x08, + } + + public class FreeTextLiteral : Terminal + { + public StringSet Terminators = new StringSet(); + public StringSet Firsts = new StringSet(); + public StringDictionary Escapes = new StringDictionary(); + public FreeTextOptions FreeTextOptions; + private char[] _stopChars; + private bool _isSimple; //True if we have a single Terminator and no escapes + private string _singleTerminator; + + public FreeTextLiteral(string name, params string[] terminators) : this(name, FreeTextOptions.None, terminators) + { + } + + public FreeTextLiteral(string name, FreeTextOptions freeTextOptions, params string[] terminators) : base(name) + { + FreeTextOptions = freeTextOptions; + Terminators.UnionWith(terminators); + SetFlag(TermFlags.IsLiteral); + }//constructor + + public override IList GetFirsts() + { + var result = new StringList(); + result.AddRange(Firsts); + return result; + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + _isSimple = Terminators.Count == 1 && Escapes.Count == 0; + if (_isSimple) + { + _singleTerminator = Terminators.First(); + return; + } + var stopChars = new CharHashSet(); + foreach (var key in Escapes.Keys) + stopChars.Add(key[0]); + foreach (var t in Terminators) + stopChars.Add(t[0]); + _stopChars = stopChars.ToArray(); + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + if (!TryMatchPrefixes(context, source)) + return null; + return _isSimple ? TryMatchContentSimple(context, source) : TryMatchContentExtended(context, source); + } + + private bool TryMatchPrefixes(ParsingContext context, ISourceStream source) + { + if (Firsts.Count == 0) + return true; + foreach (var first in Firsts) + if (source.MatchSymbol(first)) + { + source.PreviewPosition += first.Length; + return true; + } + return false; + } + + private Token TryMatchContentSimple(ParsingContext context, ISourceStream source) + { + var startPos = source.PreviewPosition; + var termLen = _singleTerminator.Length; + var stringComp = Grammar.CaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; + int termPos = source.Text.IndexOf(_singleTerminator, startPos, stringComp); + if (termPos < 0 && IsSet(FreeTextOptions.AllowEof)) + termPos = source.Text.Length; + if (termPos < 0) + return context.CreateErrorToken(Resources.ErrFreeTextNoEndTag, _singleTerminator); + var textEnd = termPos; + if (IsSet(FreeTextOptions.IncludeTerminator)) + textEnd += termLen; + var tokenText = source.Text.Substring(startPos, textEnd - startPos); + if (string.IsNullOrEmpty(tokenText) && (FreeTextOptions & FreeTextOptions.AllowEmpty) == 0) + return null; + // The following line is a fix submitted by user rmcase + source.PreviewPosition = IsSet(FreeTextOptions.ConsumeTerminator) ? termPos + termLen : termPos; + return source.CreateToken(OutputTerminal, tokenText); + } + + private Token TryMatchContentExtended(ParsingContext context, ISourceStream source) + { + StringBuilder tokenText = new StringBuilder(); + while (true) + { + //Find next position of one of stop chars + var nextPos = source.Text.IndexOfAny(_stopChars, source.PreviewPosition); + if (nextPos == -1) + { + if (IsSet(FreeTextOptions.AllowEof)) + { + source.PreviewPosition = source.Text.Length; + return source.CreateToken(OutputTerminal); + } + else + return null; + } + var newText = source.Text.Substring(source.PreviewPosition, nextPos - source.PreviewPosition); + tokenText.Append(newText); + source.PreviewPosition = nextPos; + //if it is escape, add escaped text and continue search + if (CheckEscape(source, tokenText)) + continue; + //check terminators + if (CheckTerminators(source, tokenText)) + break; //from while (true); we reached + //The current stop is not at escape or terminator; add this char to token text and move on + tokenText.Append(source.PreviewChar); + source.PreviewPosition++; + }//while + var text = tokenText.ToString(); + if (string.IsNullOrEmpty(text) && (FreeTextOptions & FreeTextOptions.AllowEmpty) == 0) + return null; + return source.CreateToken(OutputTerminal, text); + } + + private bool CheckEscape(ISourceStream source, StringBuilder tokenText) + { + foreach (var dictEntry in Escapes) + { + if (source.MatchSymbol(dictEntry.Key)) + { + source.PreviewPosition += dictEntry.Key.Length; + tokenText.Append(dictEntry.Value); + return true; + } + }//foreach + return false; + } + + private bool CheckTerminators(ISourceStream source, StringBuilder tokenText) + { + foreach (var term in Terminators) + if (source.MatchSymbol(term)) + { + if (IsSet(FreeTextOptions.IncludeTerminator)) + tokenText.Append(term); + if (IsSet(FreeTextOptions.ConsumeTerminator | FreeTextOptions.IncludeTerminator)) + source.PreviewPosition += term.Length; + return true; + } + return false; + } + + private bool IsSet(FreeTextOptions option) + { + return (FreeTextOptions & option) != 0; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/IdentifierTerminal.cs b/src/Irony/Parsing/Terminals/IdentifierTerminal.cs new file mode 100644 index 0000000..102e207 --- /dev/null +++ b/src/Irony/Parsing/Terminals/IdentifierTerminal.cs @@ -0,0 +1,303 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using System.Globalization; + +namespace Sanchime.Irony.Parsing.Terminals +{ + #region notes + + //Identifier terminal. Matches alpha-numeric sequences that usually represent identifiers and keywords. + // c#: @ prefix signals to not interpret as a keyword; allows \u escapes + // + + #endregion + + [Flags] + public enum IdOptions : short + { + None = 0, + AllowsEscapes = 0x01, + CanStartWithEscape = 0x03, + + IsNotKeyword = 0x10, + NameIncludesPrefix = 0x20, + } + + public enum CaseRestriction + { + None, + FirstUpper, + FirstLower, + AllUpper, + AllLower + } + + public class UnicodeCategoryList : List + { } + + public class IdentifierTerminal : CompoundTerminalBase + { + //Id flags for internal use + internal enum IdFlagsInternal : short + { + HasEscapes = 0x100, + } + + #region constructors and initialization + + public IdentifierTerminal(string name) : this(name, IdOptions.None) + { + } + + public IdentifierTerminal(string name, IdOptions options) : this(name, "_", "_") + { + Options = options; + } + + public IdentifierTerminal(string name, string extraChars, string extraFirstChars = "") : base(name) + { + AllFirstChars = Strings.AllLatinLetters + extraFirstChars; + AllChars = Strings.AllLatinLetters + Strings.DecimalDigits + extraChars; + } + + public void AddPrefix(string prefix, IdOptions options) + { + AddPrefixFlag(prefix, (short)options); + } + + #endregion + + #region properties: AllChars, AllFirstChars + + private CharHashSet _allCharsSet; + private CharHashSet _allFirstCharsSet; + + public string AllFirstChars; + public string AllChars; + public TokenEditorInfo KeywordEditorInfo = new TokenEditorInfo(TokenType.Keyword, TokenColor.Keyword, TokenTriggers.None); + public IdOptions Options; //flags for the case when there are no prefixes + public CaseRestriction CaseRestriction; + + public readonly UnicodeCategoryList StartCharCategories = new UnicodeCategoryList(); //categories of first char + public readonly UnicodeCategoryList CharCategories = new UnicodeCategoryList(); //categories of all other chars + public readonly UnicodeCategoryList CharsToRemoveCategories = new UnicodeCategoryList(); //categories of chars to remove from final id, usually formatting category + + #endregion + + #region overrides + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + _allCharsSet = new CharHashSet(Grammar.CaseSensitive); + _allCharsSet.UnionWith(AllChars.ToCharArray()); + + //Adjust case restriction. We adjust only first chars; if first char is ok, we will scan the rest without restriction + // and then check casing for entire identifier + switch (CaseRestriction) + { + case CaseRestriction.AllLower: + case CaseRestriction.FirstLower: + _allFirstCharsSet = new CharHashSet(true); + _allFirstCharsSet.UnionWith(AllFirstChars.ToLowerInvariant().ToCharArray()); + break; + + case CaseRestriction.AllUpper: + case CaseRestriction.FirstUpper: + _allFirstCharsSet = new CharHashSet(true); + _allFirstCharsSet.UnionWith(AllFirstChars.ToUpperInvariant().ToCharArray()); + break; + + default: //None + _allFirstCharsSet = new CharHashSet(Grammar.CaseSensitive); + _allFirstCharsSet.UnionWith(AllFirstChars.ToCharArray()); + break; + } + //if there are "first" chars defined by categories, add the terminal to FallbackTerminals + if (StartCharCategories.Count > 0) + grammarData.NoPrefixTerminals.Add(this); + if (EditorInfo == null) + EditorInfo = new TokenEditorInfo(TokenType.Identifier, TokenColor.Identifier, TokenTriggers.None); + } + + public override IList GetFirsts() + { + // new scanner: identifier has no prefixes + return null; + /* + var list = new StringList(); + list.AddRange(Prefixes); + foreach (char ch in _allFirstCharsSet) + list.Add(ch.ToString()); + if ((Options & IdOptions.CanStartWithEscape) != 0) + list.Add(this.EscapeChar.ToString()); + return list; + */ + } + + protected override void InitDetails(ParsingContext context, CompoundTokenDetails details) + { + base.InitDetails(context, details); + details.Flags = (short)Options; + } + + //Override to assign IsKeyword flag to keyword tokens + protected override Token CreateToken(ParsingContext context, ISourceStream source, CompoundTokenDetails details) + { + Token token = base.CreateToken(context, source, details); + if (details.IsSet((short)IdOptions.IsNotKeyword)) + return token; + //check if it is keyword + CheckReservedWord(token); + return token; + } + + private void CheckReservedWord(Token token) + { + KeyTerm keyTerm; + if (Grammar.KeyTerms.TryGetValue(token.Text, out keyTerm)) + { + token.KeyTerm = keyTerm; + //if it is reserved word, then overwrite terminal + if (keyTerm.Flags.IsSet(TermFlags.IsReservedWord)) + token.SetTerminal(keyTerm); + } + } + + protected override Token QuickParse(ParsingContext context, ISourceStream source) + { + if (!_allFirstCharsSet.Contains(source.PreviewChar)) + return null; + source.PreviewPosition++; + while (_allCharsSet.Contains(source.PreviewChar) && !source.EOF()) + source.PreviewPosition++; + //if it is not a terminator then cancel; we need to go through full algorithm + if (!Grammar.IsWhitespaceOrDelimiter(source.PreviewChar)) + return null; + var token = source.CreateToken(OutputTerminal); + if (CaseRestriction != CaseRestriction.None && !CheckCaseRestriction(token.ValueString)) + return null; + //!!! Do not convert to common case (all-lower) for case-insensitive grammar. Let identifiers remain as is, + // it is responsibility of interpreter to provide case-insensitive read/write operations for identifiers + // if (!this.GrammarData.Grammar.CaseSensitive) + // token.Value = token.Text.ToLower(context.Culture); + CheckReservedWord(token); + return token; + } + + protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) + { + int start = source.PreviewPosition; + bool allowEscapes = details.IsSet((short)IdOptions.AllowsEscapes); + CharList outputChars = new CharList(); + while (!source.EOF()) + { + char current = source.PreviewChar; + if (Grammar.IsWhitespaceOrDelimiter(current)) + break; + if (allowEscapes && current == EscapeChar) + { + current = ReadUnicodeEscape(source, details); + //We need to back off the position. ReadUnicodeEscape sets the position to symbol right after escape digits. + //This is the char that we should process in next iteration, so we must backup one char, to pretend the escaped + // char is at position of last digit of escape sequence. + source.PreviewPosition--; + if (details.Error != null) + return false; + } + //Check if current character is OK + if (!CharOk(current, source.PreviewPosition == start)) + break; + //Check if we need to skip this char + + UnicodeCategory currCat = CharUnicodeInfo.GetUnicodeCategory(current); //I know, it suxx, we do it twice, fix it later + if (!CharsToRemoveCategories.Contains(currCat)) + outputChars.Add(current); //add it to output (identifier) + source.PreviewPosition++; + }//while + if (outputChars.Count == 0) + return false; + //Convert collected chars to string + details.Body = new string(outputChars.ToArray()); + if (!CheckCaseRestriction(details.Body)) + return false; + return !string.IsNullOrEmpty(details.Body); + } + + private bool CharOk(char ch, bool first) + { + //first check char lists, then categories + var charSet = first ? _allFirstCharsSet : _allCharsSet; + if (charSet.Contains(ch)) return true; + //check categories + if (CharCategories.Count > 0) + { + UnicodeCategory chCat = CharUnicodeInfo.GetUnicodeCategory(ch); + UnicodeCategoryList catList = first ? StartCharCategories : CharCategories; + if (catList.Contains(chCat)) return true; + } + return false; + } + + private bool CheckCaseRestriction(string body) + { + switch (CaseRestriction) + { + case CaseRestriction.FirstLower: return char.IsLower(body, 0); + case CaseRestriction.FirstUpper: return char.IsUpper(body, 0); + case CaseRestriction.AllLower: return body.ToLower() == body; + case CaseRestriction.AllUpper: return body.ToUpper() == body; + default: return true; + } + }//method + + private char ReadUnicodeEscape(ISourceStream source, CompoundTokenDetails details) + { + //Position is currently at "\" symbol + source.PreviewPosition++; //move to U/u char + int len; + switch (source.PreviewChar) + { + case 'u': len = 4; break; + case 'U': len = 8; break; + default: + details.Error = Resources.ErrInvEscSymbol; // "Invalid escape symbol, expected 'u' or 'U' only." + return '\0'; + } + if (source.PreviewPosition + len > source.Text.Length) + { + details.Error = Resources.ErrInvEscSeq; // "Invalid escape sequence"; + return '\0'; + } + source.PreviewPosition++; //move to the first digit + string digits = source.Text.Substring(source.PreviewPosition, len); + char result = (char)Convert.ToUInt32(digits, 16); + source.PreviewPosition += len; + details.Flags |= (int)IdFlagsInternal.HasEscapes; + return result; + } + + protected override bool ConvertValue(CompoundTokenDetails details, ParsingContext context) + { + if (details.IsSet((short)IdOptions.NameIncludesPrefix)) + details.Value = details.Prefix + details.Body; + else + details.Value = details.Body; + return true; + } + + #endregion + }//class +} //namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/ImpliedSymbolTerminal.cs b/src/Irony/Parsing/Terminals/ImpliedSymbolTerminal.cs new file mode 100644 index 0000000..3e7ae36 --- /dev/null +++ b/src/Irony/Parsing/Terminals/ImpliedSymbolTerminal.cs @@ -0,0 +1,36 @@ +namespace Sanchime.Irony.Parsing.Terminals +{ + //In some grammars there is a situation when some operator symbol can be skipped in source text and should be implied by parser. + // In arithmetics, we often imply "*" operator in formulas: + // x y => x * y. + // The SearchGrammar in Samples provides another example: two consequtive terms imply "and" operator and should be treated as such: + // x y => x AND y + // We could use a simple nullable Non-terminal terminal in this case, but the problem is that we cannot associate precedence + // and associativity with non-terminal, only with terminals. Precedence is important here because the implied symbol identifies binary + // operation, so parser should be able to use precedence value(s) when resolving shift/reduce ambiguity. + // So here comes ImpliedSymbolTerminal - it is a terminal that produces a token with empty text. + // It relies on scanner-parser link enabled - so the implied symbol token is created ONLY + // when the current parser state allows it and there are no other alternatives (hence lowest priority value). + // See SearchGrammar as an example of use of this terminal. + public class ImpliedSymbolTerminal : Terminal + { + public ImpliedSymbolTerminal(string name) : base(name) + { + Priority = TerminalPriority.Low; //This terminal should be tried after all candidate terminals failed. + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + //Check that Parser-scanner link is enabled - this terminal can be used only if this link is enabled + if (Grammar.LanguageFlags.IsSet(LanguageFlags.DisableScannerParserLink)) + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrImpliedOpUseParserLink, Name); + //"ImpliedSymbolTerminal cannot be used in grammar with DisableScannerParserLink flag set" + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + return source.CreateToken(this); //Create an empty token representing an implied symbol. + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/KeyTerm.cs b/src/Irony/Parsing/Terminals/KeyTerm.cs new file mode 100644 index 0000000..5f9cc97 --- /dev/null +++ b/src/Irony/Parsing/Terminals/KeyTerm.cs @@ -0,0 +1,131 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + public class KeyTermTable : Dictionary + { + public KeyTermTable(StringComparer comparer) : base(100, comparer) + { + } + } + + public class KeyTermList : List + { } + + //Keyterm is a keyword or a special symbol used in grammar rules, for example: begin, end, while, =, *, etc. + // So "key" comes from the Keyword. + public class KeyTerm : Terminal + { + public KeyTerm(string text, string name) : base(name) + { + Text = text; + ErrorAlias = name; + Flags |= TermFlags.NoAstNode; + } + + public string Text { get; private set; } + + //Normally false, meaning keywords (symbols in grammar consisting of letters) cannot be followed by a letter or digit + public bool AllowAlphaAfterKeyword = false; + + #region overrides: TryMatch, Init, GetPrefixes(), ToString() + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + + #region comments about keyterms priority + + // Priority - determines the order in which multiple terminals try to match input for a given current char in the input. + // For a given input char the scanner looks up the collection of terminals that may match this input symbol. It is the order + // in this collection that is determined by Priority value - the higher the priority, the earlier the terminal gets a chance + // to check the input. + // Keywords found in grammar by default have lowest priority to allow other terminals (like identifiers)to check the input first. + // Additionally, longer symbols have higher priority, so symbols like "+=" should have higher priority value than "+" symbol. + // As a result, Scanner would first try to match "+=", longer symbol, and if it fails, it will try "+". + // Reserved words are the opposite - they have the highest priority + + #endregion + + if (Flags.IsSet(TermFlags.IsReservedWord)) + Priority = TerminalPriority.ReservedWords + Text.Length; //the longer the word, the higher is the priority + else + Priority = TerminalPriority.Low + Text.Length; + //Setup editor info + if (EditorInfo != null) return; + TokenType tknType = TokenType.Identifier; + if (Flags.IsSet(TermFlags.IsOperator)) + tknType |= TokenType.Operator; + else if (Flags.IsSet(TermFlags.IsDelimiter | TermFlags.IsPunctuation)) + tknType |= TokenType.Delimiter; + TokenTriggers triggers = TokenTriggers.None; + if (Flags.IsSet(TermFlags.IsBrace)) + triggers |= TokenTriggers.MatchBraces; + if (Flags.IsSet(TermFlags.IsMemberSelect)) + triggers |= TokenTriggers.MemberSelect; + TokenColor color = TokenColor.Text; + if (Flags.IsSet(TermFlags.IsKeyword)) + color = TokenColor.Keyword; + EditorInfo = new TokenEditorInfo(tknType, color, triggers); + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + if (!source.MatchSymbol(Text)) + return null; + source.PreviewPosition += Text.Length; + //In case of keywords, check that it is not followed by letter or digit + if (Flags.IsSet(TermFlags.IsKeyword) && !AllowAlphaAfterKeyword) + { + var previewChar = source.PreviewChar; + if (char.IsLetterOrDigit(previewChar) || previewChar == '_') return null; //reject + } + var token = source.CreateToken(OutputTerminal, Text); + return token; + } + + public override IList GetFirsts() + { + return new string[] { Text }; + } + + public override string ToString() + { + if (Name != Text) return Name; + return Text; + } + + public override string TokenToString(Token token) + { + var keyw = Flags.IsSet(TermFlags.IsKeyword) ? Resources.LabelKeyword : Resources.LabelKeySymbol; //"(Keyword)" : "(Key symbol)" + var result = (token.ValueString ?? token.Text) + " " + keyw; + return result; + } + + #endregion + + [System.Diagnostics.DebuggerStepThrough] + public override bool Equals(object obj) + { + return base.Equals(obj); + } + + [System.Diagnostics.DebuggerStepThrough] + public override int GetHashCode() + { + return Text.GetHashCode(); + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/LineContinuationTerminal.cs b/src/Irony/Parsing/Terminals/LineContinuationTerminal.cs new file mode 100644 index 0000000..1b41669 --- /dev/null +++ b/src/Irony/Parsing/Terminals/LineContinuationTerminal.cs @@ -0,0 +1,122 @@ +namespace Sanchime.Irony.Parsing.Terminals +{ + public class LineContinuationTerminal : Terminal + { + public LineContinuationTerminal(string name, params string[] startSymbols) : base(name, TokenCategory.Outline) + { + var symbols = startSymbols.Where(s => !IsNullOrWhiteSpace(s)).ToArray(); + StartSymbols = new StringList(symbols); + if (StartSymbols.Count == 0) + StartSymbols.AddRange(_defaultStartSymbols); + Priority = TerminalPriority.High; + } + + public StringList StartSymbols; + private string _startSymbolsFirsts = string.Concat(_defaultStartSymbols); + private static string[] _defaultStartSymbols = new[] { "\\", "_" }; + public string LineTerminators = "\n\r\v"; + + #region overrides + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + + // initialize string of start characters for fast lookup + _startSymbolsFirsts = new string(StartSymbols.Select(s => s.First()).ToArray()); + + if (EditorInfo == null) + { + EditorInfo = new TokenEditorInfo(TokenType.Delimiter, TokenColor.Comment, TokenTriggers.None); + } + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + // Quick check + var lookAhead = source.PreviewChar; + var startIndex = _startSymbolsFirsts.IndexOf(lookAhead); + if (startIndex < 0) + return null; + + // Match start symbols + if (!BeginMatch(source, startIndex, lookAhead)) + return null; + + // Match NewLine + var result = CompleteMatch(source); + if (result != null) + return result; + + // Report an error + return context.CreateErrorToken(Resources.ErrNewLineExpected); + } + + private bool BeginMatch(ISourceStream source, int startFrom, char lookAhead) + { + foreach (var startSymbol in StartSymbols.Skip(startFrom)) + { + if (startSymbol[0] != lookAhead) + continue; + if (source.MatchSymbol(startSymbol)) + { + source.PreviewPosition += startSymbol.Length; + return true; + } + } + return false; + } + + private Token CompleteMatch(ISourceStream source) + { + if (source.EOF()) + return null; + + do + { + // Match NewLine + var lookAhead = source.PreviewChar; + if (LineTerminators.IndexOf(lookAhead) >= 0) + { + source.PreviewPosition++; + // Treat \r\n as single NewLine + if (!source.EOF() && lookAhead == '\r' && source.PreviewChar == '\n') + source.PreviewPosition++; + break; + } + + // Eat up whitespace + if (Grammar.IsWhitespaceOrDelimiter(lookAhead)) + { + source.PreviewPosition++; + continue; + } + + // Fail on anything else + return null; + } + while (!source.EOF()); + + // Create output token + return source.CreateToken(OutputTerminal); + } + + public override IList GetFirsts() + { + return StartSymbols; + } + + #endregion overrides + + private static bool IsNullOrWhiteSpace(string s) + { +#if VS2008 + if (String.IsNullOrEmpty(s)) + return true; + return s.Trim().Length == 0; +#else + return string.IsNullOrWhiteSpace(s); +#endif + } + } // LineContinuationTerminal class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/NewLineTerminal.cs b/src/Irony/Parsing/Terminals/NewLineTerminal.cs new file mode 100644 index 0000000..8513958 --- /dev/null +++ b/src/Irony/Parsing/Terminals/NewLineTerminal.cs @@ -0,0 +1,60 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //This is a simple NewLine terminal recognizing line terminators for use in grammars for line-based languages like VB + // instead of more complex alternative of using CodeOutlineFilter. + public class NewLineTerminal : Terminal + { + public NewLineTerminal(string name) : base(name, TokenCategory.Outline) + { + ErrorAlias = Resources.LabelLineBreak; // "[line break]"; + Flags |= TermFlags.IsPunctuation; + } + + public string LineTerminators = "\n\r\v"; + + #region overrides: Init, GetFirsts, TryMatch + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + Grammar.UsesNewLine = true; //That will prevent SkipWhitespace method from skipping new-line chars + } + + public override IList GetFirsts() + { + StringList firsts = new StringList(); + foreach (char t in LineTerminators) + firsts.Add(t.ToString()); + return firsts; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + char current = source.PreviewChar; + if (!LineTerminators.Contains(current)) return null; + //Treat \r\n as a single terminator + bool doExtraShift = current == '\r' && source.NextPreviewChar == '\n'; + source.PreviewPosition++; //main shift + if (doExtraShift) + source.PreviewPosition++; + Token result = source.CreateToken(OutputTerminal); + return result; + } + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/NumberLiteral.cs b/src/Irony/Parsing/Terminals/NumberLiteral.cs new file mode 100644 index 0000000..6ffe427 --- /dev/null +++ b/src/Irony/Parsing/Terminals/NumberLiteral.cs @@ -0,0 +1,603 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +//Authors: Roman Ivantsov - initial implementation and some later edits +// Philipp Serr - implementation of advanced features for c#, python, VB + +using System.Globalization; + +namespace Sanchime.Irony.Parsing.Terminals +{ + using Sanchime.Irony.Ast; + using Sanchime.Irony.Parsing.Parsers; + using Sanchime.Irony.Utilities; + using BigInteger = System.Numerics.BigInteger; //Microsoft.Scripting.Math.BigInteger; + using Complex64 = System.Numerics.Complex; + + [Flags] + public enum NumberOptions + { + None = 0, + Default = None, + + AllowStartEndDot = 0x01, //python : http://docs.python.org/ref/floating.html + IntOnly = 0x02, + NoDotAfterInt = 0x04, //for use with IntOnly flag; essentially tells terminal to avoid matching integer if + + // it is followed by dot (or exp symbol) - leave to another terminal that will handle float numbers + AllowSign = 0x08, + + DisableQuickParse = 0x10, + AllowLetterAfter = 0x20, // allow number be followed by a letter or underscore; by default this flag is not set, so "3a" would not be + + // recognized as number followed by an identifier + AllowUnderscore = 0x40, // Ruby allows underscore inside number: 1_234 + + //The following should be used with base-identifying prefixes + Binary = 0x0100, //e.g. GNU GCC C Extension supports binary number literals + + Octal = 0x0200, + Hex = 0x0400, + } + + public class NumberLiteral : CompoundTerminalBase + { + //Flags for internal use + public enum NumberFlagsInternal : short + { + HasDot = 0x1000, + HasExp = 0x2000, + } + + //nested helper class + public class ExponentsTable : Dictionary + { } + + #region Public Consts + + //currently using TypeCodes for identifying numeric types + public const TypeCode TypeCodeBigInt = (TypeCode)30; + + public const TypeCode TypeCodeImaginary = (TypeCode)31; + + #endregion + + #region constructors and initialization + + public NumberLiteral(string name) : this(name, NumberOptions.Default) + { + } + + public NumberLiteral(string name, NumberOptions options, Type astNodeType) : this(name, options) + { + AstConfig.NodeType = astNodeType; + } + + public NumberLiteral(string name, NumberOptions options, AstNodeCreator astNodeCreator) : this(name, options) + { + AstConfig.NodeCreator = astNodeCreator; + } + + public NumberLiteral(string name, NumberOptions options) : base(name) + { + Options = options; + base.SetFlag(TermFlags.IsLiteral); + } + + public void AddPrefix(string prefix, NumberOptions options) + { + PrefixFlags.Add(prefix, (short)options); + Prefixes.Add(prefix); + } + + public void AddExponentSymbols(string symbols, TypeCode floatType) + { + foreach (var exp in symbols) + _exponentsTable[exp] = floatType; + } + + #endregion + + #region Public fields/properties: ExponentSymbols, Suffixes + + public NumberOptions Options; + public char DecimalSeparator = '.'; + + //Default types are assigned to literals without suffixes; first matching type used + public TypeCode[] DefaultIntTypes = new TypeCode[] { TypeCode.Int32 }; + + public TypeCode DefaultFloatType = TypeCode.Double; + private ExponentsTable _exponentsTable = new ExponentsTable(); + + public bool IsSet(NumberOptions option) + { + return (Options & option) != 0; + } + + #endregion + + #region overrides + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + //Default Exponent symbols if table is empty + if (_exponentsTable.Count == 0 && !IsSet(NumberOptions.IntOnly)) + { + _exponentsTable['e'] = DefaultFloatType; + _exponentsTable['E'] = DefaultFloatType; + } + if (EditorInfo == null) + EditorInfo = new TokenEditorInfo(TokenType.Literal, TokenColor.Number, TokenTriggers.None); + } + + public override IList GetFirsts() + { + StringList result = new StringList(); + result.AddRange(Prefixes); + //we assume that prefix is always optional, so number can always start with plain digit + result.AddRange(new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }); + // Python float numbers can start with a dot + if (IsSet(NumberOptions.AllowStartEndDot)) + result.Add(DecimalSeparator.ToString()); + if (IsSet(NumberOptions.AllowSign)) + result.AddRange(new string[] { "-", "+" }); + return result; + } + + //Most numbers in source programs are just one-digit instances of 0, 1, 2, and maybe others until 9 + // so we try to do a quick parse for these, without starting the whole general process + protected override Token QuickParse(ParsingContext context, ISourceStream source) + { + if (IsSet(NumberOptions.DisableQuickParse)) return null; + char current = source.PreviewChar; + //it must be a digit followed by a whitespace or delimiter + if (!char.IsDigit(current)) return null; + if (!Grammar.IsWhitespaceOrDelimiter(source.NextPreviewChar)) + return null; + int iValue = current - '0'; + object value = null; + switch (DefaultIntTypes[0]) + { + case TypeCode.Int32: value = iValue; break; + case TypeCode.UInt32: value = (uint)iValue; break; + case TypeCode.Byte: value = (byte)iValue; break; + case TypeCode.SByte: value = (sbyte)iValue; break; + case TypeCode.Int16: value = (short)iValue; break; + case TypeCode.UInt16: value = (ushort)iValue; break; + default: return null; + } + source.PreviewPosition++; + return source.CreateToken(OutputTerminal, value); + } + + protected override void InitDetails(ParsingContext context, CompoundTokenDetails details) + { + base.InitDetails(context, details); + details.Flags = (short)Options; + } + + protected override void ReadPrefix(ISourceStream source, CompoundTokenDetails details) + { + //check that is not a 0 followed by dot; + //this may happen in Python for number "0.123" - we can mistakenly take "0" as octal prefix + if (source.PreviewChar == '0' && source.NextPreviewChar == '.') return; + base.ReadPrefix(source, details); + }//method + + protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) + { + //remember start - it may be different from source.TokenStart, we may have skipped prefix + int start = source.PreviewPosition; + char current = source.PreviewChar; + if (IsSet(NumberOptions.AllowSign) && (current == '-' || current == '+')) + { + details.Sign = current.ToString(); + source.PreviewPosition++; + } + //Figure out digits set + string digits = GetDigits(details); + bool isDecimal = !details.IsSet((short)(NumberOptions.Binary | NumberOptions.Octal | NumberOptions.Hex)); + bool allowFloat = !IsSet(NumberOptions.IntOnly); + bool foundDigits = false; + + while (!source.EOF()) + { + current = source.PreviewChar; + //1. If it is a digit, just continue going; the same for '_' if it is allowed + if (digits.IndexOf(current) >= 0 || IsSet(NumberOptions.AllowUnderscore) && current == '_') + { + source.PreviewPosition++; + foundDigits = true; + continue; + } + //2. Check if it is a dot in float number + bool isDot = current == DecimalSeparator; + if (allowFloat && isDot) + { + //If we had seen already a dot or exponent, don't accept this one; + bool hasDotOrExp = details.IsSet((short)(NumberFlagsInternal.HasDot | NumberFlagsInternal.HasExp)); + if (hasDotOrExp) break; //from while loop + //In python number literals (NumberAllowPointFloat) a point can be the first and last character, + //We accept dot only if it is followed by a digit + if (digits.IndexOf(source.NextPreviewChar) < 0 && !IsSet(NumberOptions.AllowStartEndDot)) + break; //from while loop + details.Flags |= (int)NumberFlagsInternal.HasDot; + source.PreviewPosition++; + continue; + } + //3. Check if it is int number followed by dot or exp symbol + bool isExpSymbol = details.ExponentSymbol == null && _exponentsTable.ContainsKey(current); + if (!allowFloat && foundDigits && (isDot || isExpSymbol)) + { + //If no partial float allowed then return false - it is not integer, let float terminal recognize it as float + if (IsSet(NumberOptions.NoDotAfterInt)) return false; + //otherwise break, it is integer and we're done reading digits + break; + } + + //4. Only for decimals - check if it is (the first) exponent symbol + if (allowFloat && isDecimal && isExpSymbol) + { + char next = source.NextPreviewChar; + bool nextIsSign = next == '-' || next == '+'; + bool nextIsDigit = digits.IndexOf(next) >= 0; + if (!nextIsSign && !nextIsDigit) + break; //Exponent should be followed by either sign or digit + //ok, we've got real exponent + details.ExponentSymbol = current.ToString(); //remember the exp char + details.Flags |= (int)NumberFlagsInternal.HasExp; + source.PreviewPosition++; + if (nextIsSign) + source.PreviewPosition++; //skip +/- explicitly so we don't have to deal with them on the next iteration + continue; + } + //4. It is something else (not digit, not dot or exponent) - we're done + break; //from while loop + }//while + int end = source.PreviewPosition; + if (!foundDigits) + return false; + details.Body = source.Text.Substring(start, end - start); + return true; + } + + protected internal override void OnValidateToken(ParsingContext context) + { + if (!IsSet(NumberOptions.AllowLetterAfter)) + { + var current = context.Source.PreviewChar; + if (char.IsLetter(current) || current == '_') + { + context.CurrentToken = context.CreateErrorToken(Resources.ErrNoLetterAfterNum); // "Number cannot be followed by a letter." + } + } + base.OnValidateToken(context); + } + + protected override bool ConvertValue(CompoundTokenDetails details, ParsingContext context) + { + if (string.IsNullOrEmpty(details.Body)) + { + details.Error = Resources.ErrInvNumber; // "Invalid number."; + return false; + } + AssignTypeCodes(details); + //check for underscore + if (IsSet(NumberOptions.AllowUnderscore) && details.Body.Contains("_")) + details.Body = details.Body.Replace("_", string.Empty); + + //Try quick paths + switch (details.TypeCodes[0]) + { + case TypeCode.Int32: + if (QuickConvertToInt32(details, context)) return true; + break; + + case TypeCode.Double: + if (QuickConvertToDouble(details, context)) return true; + break; + } + + //Go full cycle + details.Value = null; + foreach (TypeCode typeCode in details.TypeCodes) + { + switch (typeCode) + { + case TypeCode.Single: + case TypeCode.Double: + case TypeCode.Decimal: + case TypeCodeImaginary: + return ConvertToFloat(typeCode, details, context); + + case TypeCode.SByte: + case TypeCode.Byte: + case TypeCode.Int16: + case TypeCode.UInt16: + case TypeCode.Int32: + case TypeCode.UInt32: + case TypeCode.Int64: + case TypeCode.UInt64: + if (details.Value == null) //if it is not done yet + TryConvertToLong(details, typeCode == TypeCode.UInt64, context); //try to convert to Long/Ulong and place the result into details.Value field; + if (TryCastToIntegerType(typeCode, details, context)) //now try to cast the ULong value to the target type + return true; + break; + + case TypeCodeBigInt: + if (ConvertToBigInteger(details)) return true; + break; + + default: + break; + }//switch + } + return false; + }//method + + private void AssignTypeCodes(CompoundTokenDetails details) + { + //Type could be assigned when we read suffix; if so, just exit + if (details.TypeCodes != null) return; + //Decide on float types + var hasDot = details.IsSet((short)NumberFlagsInternal.HasDot); + var hasExp = details.IsSet((short)NumberFlagsInternal.HasExp); + var isFloat = hasDot || hasExp; + if (!isFloat) + { + details.TypeCodes = DefaultIntTypes; + return; + } + //so we have a float. If we have exponent symbol then use it to select type + if (hasExp) + { + if (_exponentsTable.TryGetValue(details.ExponentSymbol[0], out TypeCode code)) + { + details.TypeCodes = new TypeCode[] { code }; + return; + } + }//if hasExp + //Finally assign default float type + details.TypeCodes = new TypeCode[] { DefaultFloatType }; + } + + #endregion + + #region private utilities + + private bool QuickConvertToInt32(CompoundTokenDetails details, ParsingContext context) + { + int radix = GetRadix(details); + if (radix == 10 && details.Body.Length > 10) return false; //10 digits is maximum for int32; int32.MaxValue = 2 147 483 647 + try + { + //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 + int iValue = 0; + if (radix == 10) + iValue = Convert.ToInt32(details.Body, context.Culture); + else + iValue = Convert.ToInt32(details.Body, radix); + details.Value = iValue; + return true; + } + catch + { + return false; + } + }//method + + private bool QuickConvertToDouble(CompoundTokenDetails details, ParsingContext context) + { + if (details.IsSet((short)(NumberOptions.Binary | NumberOptions.Octal | NumberOptions.Hex))) return false; + if (details.IsSet((short)NumberFlagsInternal.HasExp)) return false; + if (DecimalSeparator != '.') return false; + if (!double.TryParse(details.Body, NumberStyles.AllowDecimalPoint, context.Culture, out double dvalue)) return false; + details.Value = dvalue; + return true; + } + + private bool ConvertToFloat(TypeCode typeCode, CompoundTokenDetails details, ParsingContext context) + { + //only decimal numbers can be fractions + if (details.IsSet((short)(NumberOptions.Binary | NumberOptions.Octal | NumberOptions.Hex))) + { + details.Error = Resources.ErrInvNumber; // "Invalid number."; + return false; + } + string body = details.Body; + //Some languages allow exp symbols other than E. Check if it is the case, and change it to E + // - otherwise .NET conversion methods may fail + if (details.IsSet((short)NumberFlagsInternal.HasExp) && details.ExponentSymbol.ToUpper() != "E") + body = body.Replace(details.ExponentSymbol, "E"); + + //'.' decimal seperator required by invariant culture + if (details.IsSet((short)NumberFlagsInternal.HasDot) && DecimalSeparator != '.') + body = body.Replace(DecimalSeparator, '.'); + + switch (typeCode) + { + case TypeCode.Double: + case TypeCodeImaginary: + double dValue; + if (!double.TryParse(body, NumberStyles.Float, context.Culture, out dValue)) return false; + if (typeCode == TypeCodeImaginary) + details.Value = new Complex64(0, dValue); + else + details.Value = dValue; + return true; + + case TypeCode.Single: + float fValue; + if (!float.TryParse(body, NumberStyles.Float, context.Culture, out fValue)) return false; + details.Value = fValue; + return true; + + case TypeCode.Decimal: + decimal decValue; + if (!decimal.TryParse(body, NumberStyles.Float, context.Culture, out decValue)) return false; + details.Value = decValue; + return true; + }//switch + return false; + } + + private bool TryCastToIntegerType(TypeCode typeCode, CompoundTokenDetails details, ParsingContext context) + { + if (details.Value == null) return false; + try + { + if (typeCode != TypeCode.UInt64) + details.Value = Convert.ChangeType(details.Value, typeCode, context.Culture); + return true; + } + catch (Exception) + { + details.Error = string.Format(Resources.ErrCannotConvertValueToType, details.Value, typeCode.ToString()); + return false; + } + }//method + + private bool TryConvertToLong(CompoundTokenDetails details, bool useULong, ParsingContext context) + { + try + { + int radix = GetRadix(details); + //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 + if (radix == 10) + if (useULong) + details.Value = Convert.ToUInt64(details.Body, context.Culture); + else + details.Value = Convert.ToInt64(details.Body, context.Culture); + else + if (useULong) + details.Value = Convert.ToUInt64(details.Body, radix); + else + details.Value = Convert.ToInt64(details.Body, radix); + return true; + } + catch (OverflowException) + { + details.Error = string.Format(Resources.ErrCannotConvertValueToType, details.Value, TypeCode.Int64.ToString()); + return false; + } + } + + private bool ConvertToBigInteger(CompoundTokenDetails details) + { + //ignore leading zeros and sign + details.Body = details.Body.TrimStart('+').TrimStart('-').TrimStart('0'); + if (string.IsNullOrEmpty(details.Body)) + details.Body = "0"; + int bodyLength = details.Body.Length; + int radix = GetRadix(details); + int wordLength = GetSafeWordLength(details); + int sectionCount = GetSectionCount(bodyLength, wordLength); + ulong[] numberSections = new ulong[sectionCount]; //big endian + + try + { + int startIndex = details.Body.Length - wordLength; + for (int sectionIndex = sectionCount - 1; sectionIndex >= 0; sectionIndex--) + { + if (startIndex < 0) + { + wordLength += startIndex; + startIndex = 0; + } + //workaround for .Net FX bug: http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448 + if (radix == 10) + numberSections[sectionIndex] = Convert.ToUInt64(details.Body.Substring(startIndex, wordLength)); + else + numberSections[sectionIndex] = Convert.ToUInt64(details.Body.Substring(startIndex, wordLength), radix); + + startIndex -= wordLength; + } + } + catch + { + details.Error = Resources.ErrInvNumber;// "Invalid number."; + return false; + } + //produce big integer + ulong safeWordRadix = GetSafeWordRadix(details); + BigInteger bigIntegerValue = numberSections[0]; + for (int i = 1; i < sectionCount; i++) + bigIntegerValue = checked(bigIntegerValue * safeWordRadix + numberSections[i]); + if (details.Sign == "-") + bigIntegerValue = -bigIntegerValue; + details.Value = bigIntegerValue; + return true; + } + + private int GetRadix(CompoundTokenDetails details) + { + if (details.IsSet((short)NumberOptions.Hex)) + return 16; + if (details.IsSet((short)NumberOptions.Octal)) + return 8; + if (details.IsSet((short)NumberOptions.Binary)) + return 2; + return 10; + } + + private string GetDigits(CompoundTokenDetails details) + { + if (details.IsSet((short)NumberOptions.Hex)) + return Strings.HexDigits; + if (details.IsSet((short)NumberOptions.Octal)) + return Strings.OctalDigits; + if (details.IsSet((short)NumberOptions.Binary)) + return Strings.BinaryDigits; + return Strings.DecimalDigits; + } + + private int GetSafeWordLength(CompoundTokenDetails details) + { + if (details.IsSet((short)NumberOptions.Hex)) + return 15; + if (details.IsSet((short)NumberOptions.Octal)) + return 21; //maxWordLength 22 + if (details.IsSet((short)NumberOptions.Binary)) + return 63; + return 19; //maxWordLength 20 + } + + private int GetSectionCount(int stringLength, int safeWordLength) + { + int quotient = stringLength / safeWordLength; + int remainder = stringLength - quotient * safeWordLength; + return remainder == 0 ? quotient : quotient + 1; + } + + //radix^safeWordLength + private ulong GetSafeWordRadix(CompoundTokenDetails details) + { + if (details.IsSet((short)NumberOptions.Hex)) + return 1152921504606846976; + if (details.IsSet((short)NumberOptions.Octal)) + return 9223372036854775808; + if (details.IsSet((short)NumberOptions.Binary)) + return 9223372036854775808; + return 10000000000000000000; + } + + private static bool IsIntegerCode(TypeCode code) + { + return code >= TypeCode.SByte && code <= TypeCode.UInt64; + } + + #endregion + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/QuotedValueLiteral.cs b/src/Irony/Parsing/Terminals/QuotedValueLiteral.cs new file mode 100644 index 0000000..f19756c --- /dev/null +++ b/src/Irony/Parsing/Terminals/QuotedValueLiteral.cs @@ -0,0 +1,35 @@ +namespace Sanchime.Irony.Parsing.Terminals +{ + //Terminal for reading values enclosed in a pair of start/end characters. For ex, date literal #15/10/2009# in VB + public class QuotedValueLiteral : DataLiteralBase + { + public string StartSymbol; + public string EndSymbol; + + public QuotedValueLiteral(string name, string startEndSymbol, TypeCode dataType) : this(name, startEndSymbol, startEndSymbol, dataType) + { + } + + public QuotedValueLiteral(string name, string startSymbol, string endSymbol, TypeCode dataType) : base(name, dataType) + { + StartSymbol = startSymbol; + EndSymbol = endSymbol; + } + + public override IList GetFirsts() + { + return new string[] { StartSymbol }; + } + + protected override string ReadBody(ParsingContext context, ISourceStream source) + { + if (!source.MatchSymbol(StartSymbol)) return null; //this will result in null returned from TryMatch, no token + var start = source.Location.Position + StartSymbol.Length; + var end = source.Text.IndexOf(EndSymbol, start); + if (end < 0) return null; + var body = source.Text.Substring(start, end - start); + source.PreviewPosition = end + EndSymbol.Length; //move beyond the end of EndSymbol + return body; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/RegexBasedTerminal.cs b/src/Irony/Parsing/Terminals/RegexBasedTerminal.cs new file mode 100644 index 0000000..28a545e --- /dev/null +++ b/src/Irony/Parsing/Terminals/RegexBasedTerminal.cs @@ -0,0 +1,75 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + //Note: this class was not tested at all + // Based on contributions by CodePlex user sakana280 + // 12.09.2008 - breaking change! added "name" parameter to the constructor + public class RegexBasedTerminal : Terminal + { + public RegexBasedTerminal(string pattern, params string[] prefixes) + : base("name") + { + Pattern = pattern; + if (prefixes != null) + Prefixes.AddRange(prefixes); + } + + public RegexBasedTerminal(string name, string pattern, params string[] prefixes) : base(name) + { + Pattern = pattern; + if (prefixes != null) + Prefixes.AddRange(prefixes); + } + + #region public properties + + public readonly string Pattern; + public readonly StringList Prefixes = new StringList(); + + public Regex Expression + { + get { return _expression; } + } + + private Regex _expression; + + #endregion + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + string workPattern = @"\G(" + Pattern + ")"; + RegexOptions options = Grammar.CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase; + _expression = new Regex(workPattern, options); + if (EditorInfo == null) + EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); + } + + public override IList GetFirsts() + { + return Prefixes; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + Match m = _expression.Match(source.Text, source.PreviewPosition); + if (!m.Success || m.Index != source.PreviewPosition) + return null; + source.PreviewPosition += m.Length; + return source.CreateToken(OutputTerminal); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/RegexLiteral.cs b/src/Irony/Parsing/Terminals/RegexLiteral.cs new file mode 100644 index 0000000..d0f42af --- /dev/null +++ b/src/Irony/Parsing/Terminals/RegexLiteral.cs @@ -0,0 +1,155 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + // Regular expression literal, like javascript literal: /abc?/i + // Allows optional switches + // example: + // regex = /abc\\\/de/ + // matches fragments like "abc\/de" + // Note: switches are returned in token.Details field. Unlike in StringLiteral, we don't need to unescape the escaped chars, + // (this is the job of regex engine), we only need to correctly recognize the end of expression + + [Flags] + public enum RegexTermOptions + { + None = 0, + AllowLetterAfter = 0x01, //if not set (default) then any following letter (after legal switches) is reported as invalid switch + CreateRegExObject = 0x02, //if set, token.Value contains Regex object; otherwise, it contains a pattern (string) + UniqueSwitches = 0x04, //require unique switches + + Default = CreateRegExObject | UniqueSwitches, + } + + public class RegexLiteral : Terminal + { + public class RegexSwitchTable : Dictionary + { } + + public char StartSymbol = '/'; + public char EndSymbol = '/'; + public char EscapeSymbol = '\\'; + public RegexSwitchTable Switches = new RegexSwitchTable(); + public RegexOptions DefaultOptions = RegexOptions.None; + public RegexTermOptions Options = RegexTermOptions.Default; + + private char[] _stopChars; + + public RegexLiteral(string name) : base(name) + { + Switches.Add('i', RegexOptions.IgnoreCase); + Switches.Add('g', RegexOptions.None); //not sure what to do with this flag? anybody, any advice? + Switches.Add('m', RegexOptions.Multiline); + SetFlag(TermFlags.IsLiteral); + } + + public RegexLiteral(string name, char startEndSymbol, char escapeSymbol) : base(name) + { + StartSymbol = startEndSymbol; + EndSymbol = startEndSymbol; + EscapeSymbol = escapeSymbol; + }//constructor + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + _stopChars = new char[] { EndSymbol, '\r', '\n' }; + } + + public override IList GetFirsts() + { + var result = new StringList(); + result.Add(StartSymbol.ToString()); + return result; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + while (true) + { + //Find next position + var newPos = source.Text.IndexOfAny(_stopChars, source.PreviewPosition + 1); + //we either didn't find it + if (newPos == -1) + return context.CreateErrorToken(Resources.ErrNoEndForRegex);// "No end symbol for regex literal." + source.PreviewPosition = newPos; + if (source.PreviewChar != EndSymbol) + //we hit CR or LF, this is an error + return context.CreateErrorToken(Resources.ErrNoEndForRegex); + if (!CheckEscaped(source)) + break; + } + source.PreviewPosition++; //move after end symbol + //save pattern length, we will need it + var patternLen = source.PreviewPosition - source.Location.Position - 2; //exclude start and end symbol + //read switches and turn them into options + RegexOptions options = RegexOptions.None; + var switches = string.Empty; + while (ReadSwitch(source, ref options)) + { + if (IsSet(RegexTermOptions.UniqueSwitches) && switches.Contains(source.PreviewChar)) + return context.CreateErrorToken(Resources.ErrDupRegexSwitch, source.PreviewChar); // "Duplicate switch '{0}' for regular expression" + switches += source.PreviewChar.ToString(); + source.PreviewPosition++; + } + //check following symbol + if (!IsSet(RegexTermOptions.AllowLetterAfter)) + { + var currChar = source.PreviewChar; + if (char.IsLetter(currChar) || currChar == '_') + return context.CreateErrorToken(Resources.ErrInvRegexSwitch, currChar); // "Invalid switch '{0}' for regular expression" + } + var token = source.CreateToken(OutputTerminal); + //we have token, now what's left is to set its Value field. It is either pattern itself, or Regex instance + string pattern = token.Text.Substring(1, patternLen); //exclude start and end symbol + object value = pattern; + if (IsSet(RegexTermOptions.CreateRegExObject)) + { + value = new Regex(pattern, options); + } + token.Value = value; + token.Details = switches; //save switches in token.Details + return token; + } + + private bool CheckEscaped(ISourceStream source) + { + var savePos = source.PreviewPosition; + bool escaped = false; + source.PreviewPosition--; + while (source.PreviewChar == EscapeSymbol) + { + escaped = !escaped; + source.PreviewPosition--; + } + source.PreviewPosition = savePos; + return escaped; + } + + private bool ReadSwitch(ISourceStream source, ref RegexOptions options) + { + RegexOptions option; + var result = Switches.TryGetValue(source.PreviewChar, out option); + if (result) + options |= option; + return result; + } + + public bool IsSet(RegexTermOptions option) + { + return (Options & option) != 0; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/StringLiteral.cs b/src/Irony/Parsing/Terminals/StringLiteral.cs new file mode 100644 index 0000000..b44753a --- /dev/null +++ b/src/Irony/Parsing/Terminals/StringLiteral.cs @@ -0,0 +1,478 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +using Sanchime.Irony.Ast; + +namespace Sanchime.Irony.Parsing.Terminals +{ + [Flags] + public enum StringOptions : short + { + None = 0, + IsChar = 0x01, + AllowsDoubledQuote = 0x02, //Convert doubled start/end symbol to a single symbol; for ex. in SQL, '' -> ' + AllowsLineBreak = 0x04, + IsTemplate = 0x08, //Can include embedded expressions that should be evaluated on the fly; ex in Ruby: "hello #{name}" + NoEscapes = 0x10, + AllowsUEscapes = 0x20, + AllowsXEscapes = 0x40, + AllowsOctalEscapes = 0x80, + AllowsAllEscapes = AllowsUEscapes | AllowsXEscapes | AllowsOctalEscapes, + } + + //Container for settings of tempate string parser, to interpet strings having embedded values or expressions + // like in Ruby: + // "Hello, #{name}" + // Default values match settings for Ruby strings + public class StringTemplateSettings + { + public string StartTag = "#{"; + public string EndTag = "}"; + public NonTerminal ExpressionRoot; + } + + public class StringLiteral : CompoundTerminalBase + { + public enum StringFlagsInternal : short + { + HasEscapes = 0x100, + } + + #region StringSubType + + private class StringSubType + { + internal readonly string Start, End; + internal readonly StringOptions Flags; + internal readonly byte Index; + + internal StringSubType(string start, string end, StringOptions flags, byte index) + { + Start = start; + End = end; + Flags = flags; + Index = index; + } + + internal static int LongerStartFirst(StringSubType x, StringSubType y) + { + try + {//in case any of them is null + if (x.Start.Length > y.Start.Length) return -1; + } + catch { } + return 0; + } + } + + private class StringSubTypeList : List + { + internal void Add(string start, string end, StringOptions flags) + { + Add(new StringSubType(start, end, flags, (byte)Count)); + } + } + + #endregion + + #region constructors and initialization + + public StringLiteral(string name) : base(name) + { + SetFlag(TermFlags.IsLiteral); + } + + public StringLiteral(string name, string startEndSymbol, StringOptions options) : this(name) + { + _subtypes.Add(startEndSymbol, startEndSymbol, options); + } + + public StringLiteral(string name, string startEndSymbol) : this(name, startEndSymbol, StringOptions.None) + { + } + + public StringLiteral(string name, string startEndSymbol, StringOptions options, Type astNodeType) + : this(name, startEndSymbol, options) + { + AstConfig.NodeType = astNodeType; + } + + public StringLiteral(string name, string startEndSymbol, StringOptions options, AstNodeCreator astNodeCreator) + : this(name, startEndSymbol, options) + { + AstConfig.NodeCreator = astNodeCreator; + } + + public void AddStartEnd(string startEndSymbol, StringOptions stringOptions) + { + AddStartEnd(startEndSymbol, startEndSymbol, stringOptions); + } + + public void AddStartEnd(string startSymbol, string endSymbol, StringOptions stringOptions) + { + _subtypes.Add(startSymbol, endSymbol, stringOptions); + } + + public void AddPrefix(string prefix, StringOptions flags) + { + AddPrefixFlag(prefix, (short)flags); + } + + #endregion + + #region Properties/Fields + + private readonly StringSubTypeList _subtypes = new StringSubTypeList(); + private string _startSymbolsFirsts; //first chars of start-end symbols + + #endregion + + #region overrides: Init, GetFirsts, ReadBody, etc... + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + _startSymbolsFirsts = string.Empty; + if (_subtypes.Count == 0) + { + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrInvStrDef, Name); //"Error in string literal [{0}]: No start/end symbols specified." + return; + } + //collect all start-end symbols in lists and create strings of first chars + var allStartSymbols = new StringSet(); //to detect duplicate start symbols + _subtypes.Sort(StringSubType.LongerStartFirst); + bool isTemplate = false; + foreach (StringSubType subType in _subtypes) + { + if (allStartSymbols.Contains(subType.Start)) + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, + Resources.ErrDupStartSymbolStr, subType.Start, Name); //"Duplicate start symbol {0} in string literal [{1}]." + allStartSymbols.Add(subType.Start); + _startSymbolsFirsts += subType.Start[0].ToString(); + if ((subType.Flags & StringOptions.IsTemplate) != 0) isTemplate = true; + } + if (!CaseSensitivePrefixesSuffixes) + _startSymbolsFirsts = _startSymbolsFirsts.ToLower() + _startSymbolsFirsts.ToUpper(); + //Set multiline flag + foreach (StringSubType info in _subtypes) + { + if ((info.Flags & StringOptions.AllowsLineBreak) != 0) + { + SetFlag(TermFlags.IsMultiline); + break; + } + } + //For templates only + if (isTemplate) + { + //Check that template settings object is provided + var templateSettings = AstConfig.Data as StringTemplateSettings; + if (templateSettings == null) + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplNoSettings, Name); //"Error in string literal [{0}]: IsTemplate flag is set, but TemplateSettings is not provided." + else if (templateSettings.ExpressionRoot == null) + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplMissingExprRoot, Name); //"" + else if (!Grammar.SnippetRoots.Contains(templateSettings.ExpressionRoot)) + grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplExprNotRoot, Name); //"" + }//if + //Create editor info + if (EditorInfo == null) + EditorInfo = new TokenEditorInfo(TokenType.String, TokenColor.String, TokenTriggers.None); + }//method + + public override IList GetFirsts() + { + StringList result = new StringList(); + result.AddRange(Prefixes); + //we assume that prefix is always optional, so string can start with start-end symbol + foreach (char ch in _startSymbolsFirsts) + result.Add(ch.ToString()); + return result; + } + + protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) + { + if (!details.PartialContinues) + { + if (!ReadStartSymbol(source, details)) return false; + } + return CompleteReadBody(source, details); + } + + private bool CompleteReadBody(ISourceStream source, CompoundTokenDetails details) + { + bool escapeEnabled = !details.IsSet((short)StringOptions.NoEscapes); + int start = source.PreviewPosition; + string endQuoteSymbol = details.EndSymbol; + string endQuoteDoubled = endQuoteSymbol + endQuoteSymbol; //doubled quote symbol + bool lineBreakAllowed = details.IsSet((short)StringOptions.AllowsLineBreak); + //1. Find the string end + // first get the position of the next line break; we are interested in it to detect malformed string, + // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care). + int nlPos = lineBreakAllowed ? -1 : source.Text.IndexOf('\n', source.PreviewPosition); + //fix by ashmind for EOF right after opening symbol + while (true) + { + int endPos = source.Text.IndexOf(endQuoteSymbol, source.PreviewPosition); + //Check for partial token in line-scanning mode + if (endPos < 0 && details.PartialOk && lineBreakAllowed) + { + ProcessPartialBody(source, details); + return true; + } + //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol + bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos; + if (malformed) + { + //Set source position for recovery: move to the next line if linebreak is not allowed. + if (nlPos > 0) endPos = nlPos; + if (endPos > 0) source.PreviewPosition = endPos + 1; + details.Error = Resources.ErrBadStrLiteral;// "Mal-formed string literal - cannot find termination symbol."; + return true; //we did find start symbol, so it is definitely string, only malformed + }//if malformed + + if (source.EOF()) + return true; + + //We found EndSymbol - check if it is escaped; if yes, skip it and continue search + if (escapeEnabled && IsEndQuoteEscaped(source.Text, endPos)) + { + source.PreviewPosition = endPos + endQuoteSymbol.Length; + continue; //searching for end symbol + } + + //Check if it is doubled end symbol + source.PreviewPosition = endPos; + if (details.IsSet((short)StringOptions.AllowsDoubledQuote) && source.MatchSymbol(endQuoteDoubled)) + { + source.PreviewPosition = endPos + endQuoteDoubled.Length; + continue; + }//checking for doubled end symbol + + //Ok, this is normal endSymbol that terminates the string. + // Advance source position and get out from the loop + details.Body = source.Text.Substring(start, endPos - start); + source.PreviewPosition = endPos + endQuoteSymbol.Length; + return true; //if we come here it means we're done - we found string end. + } //end of loop to find string end; + } + + private void ProcessPartialBody(ISourceStream source, CompoundTokenDetails details) + { + int from = source.PreviewPosition; + source.PreviewPosition = source.Text.Length; + details.Body = source.Text.Substring(from, source.PreviewPosition - from); + details.IsPartial = true; + } + + protected override void InitDetails(ParsingContext context, CompoundTokenDetails details) + { + base.InitDetails(context, details); + if (context.VsLineScanState.Value != 0) + { + //we are continuing partial string on the next line + details.Flags = context.VsLineScanState.TerminalFlags; + details.SubTypeIndex = context.VsLineScanState.TokenSubType; + var stringInfo = _subtypes[context.VsLineScanState.TokenSubType]; + details.StartSymbol = stringInfo.Start; + details.EndSymbol = stringInfo.End; + } + } + + protected override void ReadSuffix(ISourceStream source, CompoundTokenDetails details) + { + base.ReadSuffix(source, details); + //"char" type can be identified by suffix (like VB where c suffix identifies char) + // in this case we have details.TypeCodes[0] == char and we need to set the IsChar flag + if (details.TypeCodes != null && details.TypeCodes[0] == TypeCode.Char) + details.Flags |= (int)StringOptions.IsChar; + else + //we may have IsChar flag set (from startEndSymbol, like in c# single quote identifies char) + // in this case set type code + if (details.IsSet((short)StringOptions.IsChar)) + details.TypeCodes = new TypeCode[] { TypeCode.Char }; + } + + private bool IsEndQuoteEscaped(string text, int quotePosition) + { + bool escaped = false; + int p = quotePosition - 1; + while (p > 0 && text[p] == EscapeChar) + { + escaped = !escaped; + p--; + } + return escaped; + } + + private bool ReadStartSymbol(ISourceStream source, CompoundTokenDetails details) + { + if (_startSymbolsFirsts.IndexOf(source.PreviewChar) < 0) + return false; + foreach (StringSubType subType in _subtypes) + { + if (!source.MatchSymbol(subType.Start)) + continue; + //We found start symbol + details.StartSymbol = subType.Start; + details.EndSymbol = subType.End; + details.Flags |= (short)subType.Flags; + details.SubTypeIndex = subType.Index; + source.PreviewPosition += subType.Start.Length; + return true; + }//foreach + return false; + }//method + + //Extract the string content from lexeme, adjusts the escaped and double-end symbols + protected override bool ConvertValue(CompoundTokenDetails details, ParsingContext context) + { + string value = details.Body; + bool escapeEnabled = !details.IsSet((short)StringOptions.NoEscapes); + //Fix all escapes + if (escapeEnabled && value.IndexOf(EscapeChar) >= 0) + { + details.Flags |= (int)StringFlagsInternal.HasEscapes; + string[] arr = value.Split(EscapeChar); + bool ignoreNext = false; + //we skip the 0 element as it is not preceeded by "\" + for (int i = 1; i < arr.Length; i++) + { + if (ignoreNext) + { + ignoreNext = false; + continue; + } + string s = arr[i]; + if (string.IsNullOrEmpty(s)) + { + //it is "\\" - escaped escape symbol. + arr[i] = @"\"; + ignoreNext = true; + continue; + } + //The char is being escaped is the first one; replace it with char in Escapes table + char first = s[0]; + char newFirst; + if (Escapes.TryGetValue(first, out newFirst)) + arr[i] = newFirst + s.Substring(1); + else + { + arr[i] = HandleSpecialEscape(arr[i], details); + }//else + }//for i + value = string.Join(string.Empty, arr); + }// if EscapeEnabled + + //Check for doubled end symbol + string endSymbol = details.EndSymbol; + if (details.IsSet((short)StringOptions.AllowsDoubledQuote) && value.IndexOf(endSymbol) >= 0) + value = value.Replace(endSymbol + endSymbol, endSymbol); + + if (details.IsSet((short)StringOptions.IsChar)) + { + if (value.Length != 1) + { + details.Error = Resources.ErrBadChar; //"Invalid length of char literal - should be a single character."; + return false; + } + details.Value = value[0]; + } + else + { + details.TypeCodes = new TypeCode[] { TypeCode.String }; + details.Value = value; + } + return true; + } + + //Should support: \Udddddddd, \udddd, \xdddd, \N{name}, \0, \ddd (octal), + protected virtual string HandleSpecialEscape(string segment, CompoundTokenDetails details) + { + if (string.IsNullOrEmpty(segment)) return string.Empty; + int len, p; string digits; char ch; string result; + char first = segment[0]; + switch (first) + { + case 'u': + case 'U': + if (details.IsSet((short)StringOptions.AllowsUEscapes)) + { + len = first == 'u' ? 4 : 8; + if (segment.Length < len + 1) + { + details.Error = string.Format(Resources.ErrBadUnEscape, segment.Substring(len + 1), len);// "Invalid unicode escape ({0}), expected {1} hex digits." + return segment; + } + digits = segment.Substring(1, len); + ch = (char)Convert.ToUInt32(digits, 16); + result = ch + segment.Substring(len + 1); + return result; + }//if + break; + + case 'x': + if (details.IsSet((short)StringOptions.AllowsXEscapes)) + { + //x-escape allows variable number of digits, from one to 4; let's count them + p = 1; //current position + while (p < 5 && p < segment.Length) + { + if (Strings.HexDigits.IndexOf(segment[p]) < 0) break; + p++; + } + //p now point to char right after the last digit + if (p <= 1) + { + details.Error = Resources.ErrBadXEscape; // @"Invalid \x escape, at least one digit expected."; + return segment; + } + digits = segment.Substring(1, p - 1); + ch = (char)Convert.ToUInt32(digits, 16); + result = ch + segment.Substring(p); + return result; + }//if + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + if (details.IsSet((short)StringOptions.AllowsOctalEscapes)) + { + //octal escape allows variable number of digits, from one to 3; let's count them + p = 0; //current position + while (p < 3 && p < segment.Length) + { + if (Strings.OctalDigits.IndexOf(segment[p]) < 0) break; + p++; + } + //p now point to char right after the last digit + digits = segment.Substring(0, p); + ch = (char)Convert.ToUInt32(digits, 8); + result = ch + segment.Substring(p); + return result; + }//if + break; + }//switch + details.Error = string.Format(Resources.ErrInvEscape, segment); //"Invalid escape sequence: \{0}" + return segment; + }//method + + #endregion + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/TerminalFactory.cs b/src/Irony/Parsing/Terminals/TerminalFactory.cs new file mode 100644 index 0000000..f0aa26c --- /dev/null +++ b/src/Irony/Parsing/Terminals/TerminalFactory.cs @@ -0,0 +1,189 @@ +#region License + +/* ********************************************************************************** + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +//Authors: Roman Ivantsov, Philipp Serr + +using System.Globalization; + +namespace Sanchime.Irony.Parsing.Terminals +{ + public static class TerminalFactory + { + public static StringLiteral CreateCSharpString(string name) + { + StringLiteral term = new StringLiteral(name, "\"", StringOptions.AllowsAllEscapes); + term.AddPrefix("@", StringOptions.NoEscapes | StringOptions.AllowsLineBreak | StringOptions.AllowsDoubledQuote); + return term; + } + + public static StringLiteral CreateCSharpChar(string name) + { + StringLiteral term = new StringLiteral(name, "'", StringOptions.IsChar); + return term; + } + + public static StringLiteral CreateVbString(string name) + { + StringLiteral term = new StringLiteral(name); + term.AddStartEnd("\"", StringOptions.NoEscapes | StringOptions.AllowsDoubledQuote); + term.AddSuffix("$", TypeCode.String); + term.AddSuffix("c", TypeCode.Char); + return term; + } + + public static StringLiteral CreatePythonString(string name) + { + StringLiteral term = new StringLiteral(name); + term.AddStartEnd("'", StringOptions.AllowsAllEscapes); + term.AddStartEnd("'''", StringOptions.AllowsAllEscapes | StringOptions.AllowsLineBreak); + term.AddStartEnd("\"", StringOptions.AllowsAllEscapes); + term.AddStartEnd("\"\"\"", StringOptions.AllowsAllEscapes | StringOptions.AllowsLineBreak); + + term.AddPrefix("u", StringOptions.AllowsAllEscapes); + term.AddPrefix("r", StringOptions.NoEscapes); + term.AddPrefix("ur", StringOptions.NoEscapes); + + return term; + } + + //http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf section 9.4.4 + public static NumberLiteral CreateCSharpNumber(string name) + { + NumberLiteral term = new NumberLiteral(name); + term.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.UInt32, TypeCode.Int64, TypeCode.UInt64 }; + term.DefaultFloatType = TypeCode.Double; + term.AddPrefix("0x", NumberOptions.Hex); + term.AddSuffix("u", TypeCode.UInt32, TypeCode.UInt64); + term.AddSuffix("l", TypeCode.Int64, TypeCode.UInt64); + term.AddSuffix("ul", TypeCode.UInt64); + term.AddSuffix("f", TypeCode.Single); + term.AddSuffix("d", TypeCode.Double); + term.AddSuffix("m", TypeCode.Decimal); + return term; + } + + //http://www.microsoft.com/downloads/details.aspx?FamilyId=6D50D709-EAA4-44D7-8AF3-E14280403E6E&displaylang=en section 2 + public static NumberLiteral CreateVbNumber(string name) + { + NumberLiteral term = new NumberLiteral(name); + term.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64 }; + //term.DefaultFloatType = TypeCode.Double; it is default + term.AddPrefix("&H", NumberOptions.Hex); + term.AddPrefix("&O", NumberOptions.Octal); + term.AddSuffix("S", TypeCode.Int16); + term.AddSuffix("I", TypeCode.Int32); + term.AddSuffix("%", TypeCode.Int32); + term.AddSuffix("L", TypeCode.Int64); + term.AddSuffix("&", TypeCode.Int64); + term.AddSuffix("D", TypeCode.Decimal); + term.AddSuffix("@", TypeCode.Decimal); + term.AddSuffix("F", TypeCode.Single); + term.AddSuffix("!", TypeCode.Single); + term.AddSuffix("R", TypeCode.Double); + term.AddSuffix("#", TypeCode.Double); + term.AddSuffix("US", TypeCode.UInt16); + term.AddSuffix("UI", TypeCode.UInt32); + term.AddSuffix("UL", TypeCode.UInt64); + return term; + } + + //http://docs.python.org/ref/numbers.html + public static NumberLiteral CreatePythonNumber(string name) + { + NumberLiteral term = new NumberLiteral(name, NumberOptions.AllowStartEndDot); + //default int types are Integer (32bit) -> LongInteger (BigInt); Try Int64 before BigInt: Better performance? + term.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; + // term.DefaultFloatType = TypeCode.Double; -- it is default + //float type is implementation specific, thus try decimal first (higher precision) + //term.DefaultFloatTypes = new TypeCode[] { TypeCode.Decimal, TypeCode.Double }; + term.AddPrefix("0x", NumberOptions.Hex); + term.AddPrefix("0", NumberOptions.Octal); + term.AddSuffix("L", TypeCode.Int64, NumberLiteral.TypeCodeBigInt); + term.AddSuffix("J", NumberLiteral.TypeCodeImaginary); + return term; + } + + // About exponent symbols, extract from R6RS: + // ... representations of number objects may be written with an exponent marker that indicates the desired precision + // of the inexact representation. The letters s, f, d, and l specify the use of short, single, double, and long precision, respectively. + // ... + // In addition, the exponent marker e specifies the default precision for the implementation. The default precision + // has at least as much precision as double, but implementations may wish to allow this default to be set by the user. + public static NumberLiteral CreateSchemeNumber(string name) + { + NumberLiteral term = new NumberLiteral(name); + term.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; + term.DefaultFloatType = TypeCode.Double; // it is default + term.AddExponentSymbols("eE", TypeCode.Double); //default precision for platform, double + term.AddExponentSymbols("sSfF", TypeCode.Single); + term.AddExponentSymbols("dDlL", TypeCode.Double); + term.AddPrefix("#b", NumberOptions.Binary); + term.AddPrefix("#o", NumberOptions.Octal); + term.AddPrefix("#x", NumberOptions.Hex); + term.AddPrefix("#d", NumberOptions.None); + term.AddPrefix("#i", NumberOptions.None); // inexact prefix, has no effect + term.AddPrefix("#e", NumberOptions.None); // exact prefix, has no effect + term.AddSuffix("J", NumberLiteral.TypeCodeImaginary); + return term; + } + + public static IdentifierTerminal CreateCSharpIdentifier(string name) + { + IdentifierTerminal id = new IdentifierTerminal(name, IdOptions.AllowsEscapes | IdOptions.CanStartWithEscape); + id.AddPrefix("@", IdOptions.IsNotKeyword); + //From spec: + //Start char is "_" or letter-character, which is a Unicode character of classes Lu, Ll, Lt, Lm, Lo, or Nl + id.StartCharCategories.AddRange(new UnicodeCategory[] { + UnicodeCategory.UppercaseLetter, //Ul + UnicodeCategory.LowercaseLetter, //Ll + UnicodeCategory.TitlecaseLetter, //Lt + UnicodeCategory.ModifierLetter, //Lm + UnicodeCategory.OtherLetter, //Lo + UnicodeCategory.LetterNumber //Nl + }); + //Internal chars + /* From spec: + identifier-part-character: letter-character | decimal-digit-character | connecting-character | combining-character | + formatting-character + */ + id.CharCategories.AddRange(id.StartCharCategories); //letter-character categories + id.CharCategories.AddRange(new UnicodeCategory[] { + UnicodeCategory.DecimalDigitNumber, //Nd + UnicodeCategory.ConnectorPunctuation, //Pc + UnicodeCategory.SpacingCombiningMark, //Mc + UnicodeCategory.NonSpacingMark, //Mn + UnicodeCategory.Format //Cf + }); + //Chars to remove from final identifier + id.CharsToRemoveCategories.Add(UnicodeCategory.Format); + return id; + } + + public static IdentifierTerminal CreatePythonIdentifier(string name) + { + IdentifierTerminal id = new IdentifierTerminal("Identifier"); //defaults are OK + return id; + } + + //Covers simple identifiers like abcd, and also quoted versions: [abc d], "abc d". + public static IdentifierTerminal CreateSqlExtIdentifier(Grammar grammar, string name) + { + var id = new IdentifierTerminal(name); + StringLiteral term = new StringLiteral(name + "_qouted"); + term.AddStartEnd("[", "]", StringOptions.NoEscapes); + term.AddStartEnd("\"", StringOptions.NoEscapes); + term.SetOutputTerminal(grammar, id); //term will be added to NonGrammarTerminals automatically + return id; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/WikiTerminals/WikiBlockTerminal.cs b/src/Irony/Parsing/Terminals/WikiTerminals/WikiBlockTerminal.cs new file mode 100644 index 0000000..ab10ce6 --- /dev/null +++ b/src/Irony/Parsing/Terminals/WikiTerminals/WikiBlockTerminal.cs @@ -0,0 +1,44 @@ +namespace Sanchime.Irony.Parsing.Terminals.WikiTerminals +{ + public enum WikiBlockType + { + EscapedText, + CodeBlock, + Anchor, + LinkToAnchor, + Url, + FileLink, //looks like it is the same as Url + Image, + } + + public class WikiBlockTerminal : WikiTerminalBase + { + public readonly WikiBlockType BlockType; + + public WikiBlockTerminal(string name, WikiBlockType blockType, string openTag, string closeTag, string htmlElementName) + : base(name, WikiTermType.Block, openTag, closeTag, htmlElementName) + { + BlockType = blockType; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + if (!source.MatchSymbol(OpenTag)) return null; + source.PreviewPosition += OpenTag.Length; + var endPos = source.Text.IndexOf(CloseTag, source.PreviewPosition); + string content; + if (endPos > 0) + { + content = source.Text.Substring(source.PreviewPosition, endPos - source.PreviewPosition); + source.PreviewPosition = endPos + CloseTag.Length; + } + else + { + content = source.Text.Substring(source.PreviewPosition, source.Text.Length - source.PreviewPosition); + source.PreviewPosition = source.Text.Length; + } + var token = source.CreateToken(OutputTerminal, content); + return token; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/WikiTerminals/WikiTagTerminal.cs b/src/Irony/Parsing/Terminals/WikiTerminals/WikiTagTerminal.cs new file mode 100644 index 0000000..7713c78 --- /dev/null +++ b/src/Irony/Parsing/Terminals/WikiTerminals/WikiTagTerminal.cs @@ -0,0 +1,32 @@ +namespace Sanchime.Irony.Parsing.Terminals.WikiTerminals +{ + //Handles formatting tags like *bold*, _italic_; also handles headings and lists + public class WikiTagTerminal : WikiTerminalBase + { + public WikiTagTerminal(string name, WikiTermType termType, string tag, string htmlElementName) + : this(name, termType, tag, string.Empty, htmlElementName) { } + + public WikiTagTerminal(string name, WikiTermType termType, string openTag, string closeTag, string htmlElementName) + : base(name, termType, openTag, closeTag, htmlElementName) { } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + bool isHeadingOrList = TermType == WikiTermType.Heading || TermType == WikiTermType.List; + if (isHeadingOrList) + { + bool isAfterNewLine = context.PreviousToken == null || context.PreviousToken.Terminal == Grammar.NewLine; + if (!isAfterNewLine) return null; + } + if (!source.MatchSymbol(OpenTag)) return null; + source.PreviewPosition += OpenTag.Length; + //For headings and lists require space after + if (TermType == WikiTermType.Heading || TermType == WikiTermType.List) + { + const string whitespaces = " \t\r\n\v"; + if (!whitespaces.Contains(source.PreviewChar)) return null; + } + var token = source.CreateToken(OutputTerminal); + return token; + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/WikiTerminals/WikiTextTerminal.cs b/src/Irony/Parsing/Terminals/WikiTerminals/WikiTextTerminal.cs new file mode 100644 index 0000000..e2a123a --- /dev/null +++ b/src/Irony/Parsing/Terminals/WikiTerminals/WikiTextTerminal.cs @@ -0,0 +1,55 @@ +namespace Sanchime.Irony.Parsing.Terminals.WikiTerminals +{ + //Handles plain text + public class WikiTextTerminal : WikiTerminalBase + { + public const char NoEscape = '\0'; + public char EscapeChar = NoEscape; + private char[] _stopChars; + + public WikiTextTerminal(string name) : base(name, WikiTermType.Text, string.Empty, string.Empty, string.Empty) + { + Priority = TerminalPriority.Low; + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + var stopCharSet = new CharHashSet(); + foreach (var term in grammarData.Terminals) + { + var firsts = term.GetFirsts(); + if (firsts == null) continue; + foreach (var first in firsts) + if (!string.IsNullOrEmpty(first)) + stopCharSet.Add(first[0]); + }//foreach term + if (EscapeChar != NoEscape) + stopCharSet.Add(EscapeChar); + _stopChars = stopCharSet.ToArray(); + } + + //override to WikiTerminalBase's method to return null, indicating there are no firsts, so it is a fallback terminal + public override IList GetFirsts() + { + return null; + } + + public override Token TryMatch(ParsingContext context, ISourceStream source) + { + bool isEscape = source.PreviewChar == EscapeChar && EscapeChar != NoEscape; + if (isEscape) + { + //return a token containing only escaped char + var value = source.NextPreviewChar.ToString(); + source.PreviewPosition += 2; + return source.CreateToken(OutputTerminal, value); + } + var stopIndex = source.Text.IndexOfAny(_stopChars, source.Location.Position + 1); + if (stopIndex == source.Location.Position) return null; + if (stopIndex < 0) stopIndex = source.Text.Length; + source.PreviewPosition = stopIndex; + return source.CreateToken(OutputTerminal); + }//method + }//class +} \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/WikiTerminals/_WikiTerminalBase.cs b/src/Irony/Parsing/Terminals/WikiTerminals/_WikiTerminalBase.cs new file mode 100644 index 0000000..ec4b27f --- /dev/null +++ b/src/Irony/Parsing/Terminals/WikiTerminals/_WikiTerminalBase.cs @@ -0,0 +1,51 @@ +namespace Sanchime.Irony.Parsing.Terminals.WikiTerminals +{ + public enum WikiTermType + { + Text, + Element, + Format, + Heading, + List, + Block, + Table + } + + public abstract class WikiTerminalBase : Terminal + { + public readonly WikiTermType TermType; + public readonly string OpenTag, CloseTag; + public string HtmlElementName, ContainerHtmlElementName; + public string OpenHtmlTag, CloseHtmlTag; + public string ContainerOpenHtmlTag, ContainerCloseHtmlTag; + + public WikiTerminalBase(string name, WikiTermType termType, string openTag, string closeTag, string htmlElementName) : base(name) + { + TermType = termType; + OpenTag = openTag; + CloseTag = closeTag; + HtmlElementName = htmlElementName; + Priority = TerminalPriority.Normal + OpenTag.Length; //longer tags have higher priority + } + + public override IList GetFirsts() + { + return new string[] { OpenTag }; + } + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + if (!string.IsNullOrEmpty(HtmlElementName)) + { + if (string.IsNullOrEmpty(OpenHtmlTag)) OpenHtmlTag = "<" + HtmlElementName + ">"; + if (string.IsNullOrEmpty(CloseHtmlTag)) CloseHtmlTag = ""; + } + if (!string.IsNullOrEmpty(ContainerHtmlElementName)) + { + if (string.IsNullOrEmpty(ContainerOpenHtmlTag)) ContainerOpenHtmlTag = "<" + ContainerHtmlElementName + ">"; + if (string.IsNullOrEmpty(ContainerCloseHtmlTag)) ContainerCloseHtmlTag = ""; + } + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/Terminals/_Terminal.cs b/src/Irony/Parsing/Terminals/_Terminal.cs new file mode 100644 index 0000000..9e73880 --- /dev/null +++ b/src/Irony/Parsing/Terminals/_Terminal.cs @@ -0,0 +1,179 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.Terminals +{ + public static class TerminalPriority + { + public static int Low = -1000; + public static int Normal = 0; + public static int High = 1000; + public static int ReservedWords = 900; + } + + public partial class Terminal : BnfTerm + { + #region Constructors + + public Terminal(string name) : this(name, TokenCategory.Content, TermFlags.None) + { + } + + public Terminal(string name, TokenCategory category) : this(name, category, TermFlags.None) + { + } + + public Terminal(string name, string errorAlias, TokenCategory category, TermFlags flags) : this(name, category, flags) + { + ErrorAlias = errorAlias; + } + + public Terminal(string name, TokenCategory category, TermFlags flags) : base(name) + { + Category = category; + Flags |= flags; + if (Category == TokenCategory.Outline) + SetFlag(TermFlags.IsPunctuation); + OutputTerminal = this; + } + + #endregion + + #region fields and properties + + public TokenCategory Category = TokenCategory.Content; + + // Priority is used when more than one terminal may match the input char. + // It determines the order in which terminals will try to match input for a given char in the input. + // For a given input char the scanner uses the hash table to look up the collection of terminals that may match this input symbol. + // It is the order in this collection that is determined by Priority property - the higher the priority, + // the earlier the terminal gets a chance to check the input. + public int Priority = TerminalPriority.Normal; //default is 0 + + //Terminal to attach to the output token. By default is set to the Terminal itself + // Use SetOutputTerminal method to change it. For example of use see TerminalFactory.CreateSqlIdentifier and sample SQL grammar + public Terminal OutputTerminal { get; protected set; } + + public TokenEditorInfo EditorInfo; + public byte MultilineIndex; + public Terminal IsPairFor; + + #endregion + + #region virtual methods: GetFirsts(), TryMatch, Init, TokenToString + + public override void Init(GrammarData grammarData) + { + base.Init(grammarData); + } + + //"Firsts" (chars) collections are used for quick search for possible matching terminal(s) using current character in the input stream. + // A terminal might declare no firsts. In this case, the terminal is tried for match for any current input character. + public virtual IList GetFirsts() + { + return null; + } + + public virtual Token TryMatch(ParsingContext context, ISourceStream source) + { + return null; + } + + public virtual string TokenToString(Token token) + { + if (token.ValueString == Name) + return token.ValueString; + else + return (token.ValueString ?? token.Text) + " (" + Name + ")"; + } + + #endregion + + #region Events: ValidateToken, ParserInputPreview + + public event EventHandler ValidateToken; + + protected internal virtual void OnValidateToken(ParsingContext context) + { + if (ValidateToken != null) + ValidateToken(this, context.SharedValidateTokenEventArgs); + } + + //Invoked when ParseTreeNode is created from the token. This is parser-preview event, when parser + // just received the token, wrapped it into ParseTreeNode and is about to look at it. + public event EventHandler ParserInputPreview; + + protected internal virtual void OnParserInputPreview(ParsingContext context) + { + if (ParserInputPreview != null) + ParserInputPreview(this, context.SharedParsingEventArgs); + } + + #endregion + + #region static comparison methods + + public static int ByPriorityReverse(Terminal x, Terminal y) + { + if (x.Priority > y.Priority) + return -1; + if (x.Priority == y.Priority) + return 0; + return 1; + } + + #endregion + + #region Miscellaneous: SetOutputTerminal + + public void SetOutputTerminal(Grammar grammar, Terminal outputTerminal) + { + OutputTerminal = outputTerminal; + grammar.NonGrammarTerminals.Add(this); + } + + #endregion + + //Priority constants + [Obsolete("Deprecated: use constants in TerminalPriority class instead")] + public const int LowestPriority = -1000; + + [Obsolete("Deprecated: use constants in TerminalPriority class instead")] + public const int HighestPriority = 1000; + + [Obsolete("Deprecated: use constants in TerminalPriority class instead")] + public const int ReservedWordsPriority = 900; //almost top one + + public static string TerminalsToString(IEnumerable terminals) + { + return string.Join(" ", terminals); + } + }//class + + public class TerminalSet : HashSet + { + public override string ToString() + { + return Terminal.TerminalsToString(this); + } + } + + public class TerminalList : List + { + public override string ToString() + { + return Terminal.TerminalsToString(this); + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/TokenFilters/CodeOutlineFilter.cs b/src/Irony/Parsing/TokenFilters/CodeOutlineFilter.cs new file mode 100644 index 0000000..edc20b5 --- /dev/null +++ b/src/Irony/Parsing/TokenFilters/CodeOutlineFilter.cs @@ -0,0 +1,220 @@ +namespace Sanchime.Irony.Parsing.TokenFilters +{ + [Flags] + public enum OutlineOptions + { + None = 0, + ProduceIndents = 0x01, + CheckBraces = 0x02, + CheckOperator = 0x04, //to implement, auto line joining if line ends with operator + } + + public class CodeOutlineFilter : TokenFilter + { + public readonly OutlineOptions Options; + public readonly KeyTerm ContinuationTerminal; //Terminal + + private GrammarData _grammarData; + private Grammar _grammar; + private ParsingContext _context; + private bool _produceIndents; + private bool _checkBraces, _checkOperator; + + public Stack Indents = new Stack(); + public Token CurrentToken; + public Token PreviousToken; + public SourceLocation PreviousTokenLocation; + public TokenStack OutputTokens = new TokenStack(); + private bool _isContinuation, _prevIsContinuation; + private bool _isOperator, _prevIsOperator; + private bool _doubleEof; + + #region constructor + + public CodeOutlineFilter(GrammarData grammarData, OutlineOptions options, KeyTerm continuationTerminal) + { + _grammarData = grammarData; + _grammar = grammarData.Grammar; + _grammar.LanguageFlags |= LanguageFlags.EmitLineStartToken; + Options = options; + ContinuationTerminal = continuationTerminal; + if (ContinuationTerminal != null) + if (!_grammar.NonGrammarTerminals.Contains(ContinuationTerminal)) + _grammarData.Language.Errors.Add(GrammarErrorLevel.Warning, null, Resources.ErrOutlineFilterContSymbol, ContinuationTerminal.Name); + //"CodeOutlineFilter: line continuation symbol '{0}' should be added to Grammar.NonGrammarTerminals list.", + _produceIndents = OptionIsSet(OutlineOptions.ProduceIndents); + _checkBraces = OptionIsSet(OutlineOptions.CheckBraces); + _checkOperator = OptionIsSet(OutlineOptions.CheckOperator); + Reset(); + } + + #endregion constructor + + public override void Reset() + { + base.Reset(); + Indents.Clear(); + Indents.Push(0); + OutputTokens.Clear(); + PreviousToken = null; + CurrentToken = null; + PreviousTokenLocation = new SourceLocation(); + } + + public bool OptionIsSet(OutlineOptions option) + { + return (Options & option) != 0; + } + + public override IEnumerable BeginFiltering(ParsingContext context, IEnumerable tokens) + { + _context = context; + foreach (Token token in tokens) + { + ProcessToken(token); + while (OutputTokens.Count > 0) + yield return OutputTokens.Pop(); + }//foreach + }//method + + public void ProcessToken(Token token) + { + SetCurrentToken(token); + //Quick checks + if (_isContinuation) + return; + var tokenTerm = token.Terminal; + + //check EOF + if (tokenTerm == _grammar.Eof) + { + ProcessEofToken(); + return; + } + + if (tokenTerm != _grammar.LineStartTerminal) return; + //if we are here, we have LineStart token on new line; first remove it from stream, it should not go to parser + OutputTokens.Pop(); + + if (PreviousToken == null) return; + + // first check if there was continuation symbol before + // or - if checkBraces flag is set - check if there were open braces + if (_prevIsContinuation || _checkBraces && _context.OpenBraces.Count > 0) + return; //no Eos token in this case + if (_prevIsOperator && _checkOperator) + return; //no Eos token in this case + + //We need to produce Eos token and indents (if _produceIndents is set). + // First check indents - they go first into OutputTokens stack, so they will be popped out last + if (_produceIndents) + { + var currIndent = token.Location.Column; + var prevIndent = Indents.Peek(); + if (currIndent > prevIndent) + { + Indents.Push(currIndent); + PushOutlineToken(_grammar.Indent, token.Location); + } + else if (currIndent < prevIndent) + { + PushDedents(currIndent); + //check that current indent exactly matches the previous indent + if (Indents.Peek() != currIndent) + { + //fire error + OutputTokens.Push(new Token(_grammar.SyntaxError, token.Location, string.Empty, Resources.ErrInvDedent)); + // "Invalid dedent level, no previous matching indent found." + } + } + }//if _produceIndents + //Finally produce Eos token, but not in command line mode. In command line mode the Eos was already produced + // when we encountered Eof on previous line + if (_context.Mode != ParseMode.CommandLine) + { + var eosLocation = ComputeEosLocation(); + PushOutlineToken(_grammar.Eos, eosLocation); + } + }//method + + private void SetCurrentToken(Token token) + { + _doubleEof = CurrentToken != null && CurrentToken.Terminal == _grammar.Eof + && token.Terminal == _grammar.Eof; + //Copy CurrentToken to PreviousToken + if (CurrentToken != null && CurrentToken.Category == TokenCategory.Content) + { //remember only content tokens + PreviousToken = CurrentToken; + _prevIsContinuation = _isContinuation; + _prevIsOperator = _isOperator; + if (PreviousToken != null) + PreviousTokenLocation = PreviousToken.Location; + } + CurrentToken = token; + _isContinuation = token.Terminal == ContinuationTerminal && ContinuationTerminal != null; + _isOperator = token.Terminal.Flags.IsSet(TermFlags.IsOperator); + if (!_isContinuation) + OutputTokens.Push(token); //by default input token goes to output, except continuation symbol + } + + //Processes Eof token. We should take into account the special case of processing command line input. + // In this case we should not automatically dedent all stacked indents if we get EOF. + // Note that tokens will be popped from the OutputTokens stack and sent to parser in the reverse order compared to + // the order we pushed them into OutputTokens stack. We have Eof already in stack; we first push dedents, then Eos + // They will come out to parser in the following order: Eos, Dedents, Eof. + private void ProcessEofToken() + { + //First decide whether we need to produce dedents and Eos symbol + bool pushDedents = false; + bool pushEos = true; + switch (_context.Mode) + { + case ParseMode.File: + pushDedents = _produceIndents; //Do dedents if token filter tracks indents + break; + + case ParseMode.CommandLine: + //only if user entered empty line, we dedent all + pushDedents = _produceIndents && _doubleEof; + pushEos = !_prevIsContinuation && !_doubleEof; //if previous symbol is continuation symbol then don't push Eos + break; + + case ParseMode.VsLineScan: + pushDedents = false; //Do not dedent at all on every line end + break; + } + //unindent all buffered indents; + if (pushDedents) PushDedents(0); + //now push Eos token - it will be popped first, then dedents, then EOF token + if (pushEos) + { + var eosLocation = ComputeEosLocation(); + PushOutlineToken(_grammar.Eos, eosLocation); + } + } + + private void PushDedents(int untilPosition) + { + while (Indents.Peek() > untilPosition) + { + Indents.Pop(); + PushOutlineToken(_grammar.Dedent, CurrentToken.Location); + } + } + + private SourceLocation ComputeEosLocation() + { + if (PreviousToken == null) + return new SourceLocation(); + //Return position at the end of previous token + var loc = PreviousToken.Location; + var len = PreviousToken.Length; + return new SourceLocation(loc.Position + len, loc.Line, loc.Column + len); + } + + private void PushOutlineToken(Terminal term, SourceLocation location) + { + OutputTokens.Push(new Token(term, location, string.Empty, null)); + } + }//class +}//namespace \ No newline at end of file diff --git a/src/Irony/Parsing/TokenFilters/TokenFilter.cs b/src/Irony/Parsing/TokenFilters/TokenFilter.cs new file mode 100644 index 0000000..957fa56 --- /dev/null +++ b/src/Irony/Parsing/TokenFilters/TokenFilter.cs @@ -0,0 +1,60 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Parsing.TokenFilters +{ + #region Comments + + // Token filter is a token preprocessor that operates on a token stream between scanner and parser: + // scanner -> (token filters)-> parser + // Token stream from scanner output is fed into a chain of token filters that add/remove/modify tokens + // in the stream before it gets to the parser. Some tasks that come up in scanning and parsing are best + // handled by such an intermediate processor. Examples: + // * Macro expansion + // * Conditional compilation clauses + // * Handling commented-out blocks. Scheme allows commenting out entire blocks of code using "#;" prefix followed by + // well-formed datum. This type of comments cannot be handled by scanner as it requires parser-like processing + // of the stream to locate the end of the block. At the same time parser is not a good place to handle this either, + // as it would require defining optional "commented block" element everywhere in the grammar. + // Token filter is an ideal place for implementing this task - after scanning but before parsing. + // * Assembling doc-comment blocks (XML doc lines in c#) from individual comment lines + // and attaching it to the next content token, and later sticking it to the parsed node. + // * Handling newlines, indents and unindents for languages like Python. + // Tracking this information directly in the scanner makes things really messy, and it does not fit well + // into general-purpose scanner. Token filter can handle it easily. In this case the scanner + // handles the new-line character and indentations as whitespace and simply ignores it. + // The CodeOutlineFilter re-creates new-line and indent tokens by analyzing + // the line/column properties of the incoming tokens, and inserts them into its output. + + #endregion + + public class TokenFilter + { + public virtual IEnumerable BeginFiltering(ParsingContext context, IEnumerable tokens) + { + yield break; + } + + public virtual void Reset() + { + } + + protected internal virtual void OnSetSourceLocation(SourceLocation location) + { + } + }//class + + public class TokenFilterList : List + { } +}//namespace \ No newline at end of file diff --git a/src/Irony/Resources.Designer.cs b/src/Irony/Resources.Designer.cs new file mode 100644 index 0000000..98888dc --- /dev/null +++ b/src/Irony/Resources.Designer.cs @@ -0,0 +1,1070 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace Sanchime.Irony +{ + using System.Reflection; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + public class Resources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + internal Resources() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + public static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Sanchime.Irony.Resources", typeof(Resources).GetTypeInfo().Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + public static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + + /// + /// Looks up a localized string similar to Nn. + /// + public static string ConsoleNoChars { + get { + return ResourceManager.GetString("ConsoleNoChars", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Yy. + /// + public static string ConsoleYesChars { + get { + return ResourceManager.GetString("ConsoleYesChars", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Ambiguous grammar, unresolvable reduce-reduce conflicts. State {0}, lookaheads [{1}]. + /// + public static string ErrAmbigGrammarRR { + get { + return ResourceManager.GetString("ErrAmbigGrammarRR", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Ambiguous grammar, unresolvable shift-reduce conflicts. State {0}, lookaheads [{1}]. + /// + public static string ErrAmbigGrammarSR { + get { + return ResourceManager.GetString("ErrAmbigGrammarSR", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Argument list not found in the stack. Expected: ValueList, found: {0}.. + /// + public static string ErrArgListNotFound { + get { + return ResourceManager.GetString("ErrArgListNotFound", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid operation, attempt to assign to a constant or literal value.. + /// + public static string ErrAssignLiteralValue { + get { + return ResourceManager.GetString("ErrAssignLiteralValue", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid length of char literal - should be a single character.. + /// + public static string ErrBadChar { + get { + return ResourceManager.GetString("ErrBadChar", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Mal-formed string literal - cannot find termination symbol.. + /// + public static string ErrBadStrLiteral { + get { + return ResourceManager.GetString("ErrBadStrLiteral", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid unicode escape ({0}), expected {1} hex digits.. + /// + public static string ErrBadUnEscape { + get { + return ResourceManager.GetString("ErrBadUnEscape", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid \x escape, at least one digit expected.. + /// + public static string ErrBadXEscape { + get { + return ResourceManager.GetString("ErrBadXEscape", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Cannot convert value from type {0} to type {1}, type converter not defined.. + /// + public static string ErrCannotConvertValue { + get { + return ResourceManager.GetString("ErrCannotConvertValue", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Cannot convert literal {0} to type {1}.. + /// + public static string ErrCannotConvertValueToType { + get { + return ResourceManager.GetString("ErrCannotConvertValueToType", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Fatal error in parser: attempt to move back in the source.. + /// + public static string ErrCannotMoveBackInSource { + get { + return ResourceManager.GetString("ErrCannotMoveBackInSource", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to {0} State {1} on inputs: {2}. + /// + public static string ErrConflictMsgTemplate { + get { + return ResourceManager.GetString("ErrConflictMsgTemplate", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Fatal error:. + /// + public static string ErrConsoleFatalError { + get { + return ResourceManager.GetString("ErrConsoleFatalError", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Construct '{0}' is not supported (yet) by language implementation.. + /// + public static string ErrConstructNotSupported { + get { + return ResourceManager.GetString("ErrConstructNotSupported", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Could not find a closing quote for quoted value.. + /// + public static string ErrDsvNoClosingQuote { + get { + return ResourceManager.GetString("ErrDsvNoClosingQuote", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Duplicate switch '{0}' for regular expression.. + /// + public static string ErrDupRegexSwitch { + get { + return ResourceManager.GetString("ErrDupRegexSwitch", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Duplicate start symbol {0} in string literal [{1}].. + /// + public static string ErrDupStartSymbolStr { + get { + return ResourceManager.GetString("ErrDupStartSymbolStr", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Failed to create AST node for non-terminal [{0}], error: {1}. + /// + public static string ErrFailedCreateNode { + get { + return ResourceManager.GetString("ErrFailedCreateNode", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Failed to find the ending tag '{0}' for a text literal. . + /// + public static string ErrFreeTextNoEndTag { + get { + return ResourceManager.GetString("ErrFreeTextNoEndTag", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to ImpliedSymbolTerminal cannot be used in grammar with DisableScannerParserLink flag set. + /// + public static string ErrImpliedOpUseParserLink { + get { + return ResourceManager.GetString("ErrImpliedOpUseParserLink", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Interpreter error, DataStack.Pop() operation failed - stack is empty.. + /// + public static string ErrInternalErrDataPopFailed { + get { + return ResourceManager.GetString("ErrInternalErrDataPopFailed", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Interpreter is busy.. + /// + public static string ErrInterpreterIsBusy { + get { + return ResourceManager.GetString("ErrInterpreterIsBusy", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid arguments for IncDecNode AST node: either first or second argument should be '--' or '++'.. + /// + public static string ErrInvalidArgsForIncDec { + get { + return ResourceManager.GetString("ErrInvalidArgsForIncDec", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid AstMode value in call to Evaluate method. Node: {0}, mode: {1}.. + /// + public static string ErrInvalidAstMode { + get { + return ResourceManager.GetString("ErrInvalidAstMode", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid character: '{0}'.. + /// + public static string ErrInvalidChar { + get { + return ResourceManager.GetString("ErrInvalidChar", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid embedded expression. . + /// + public static string ErrInvalidEmbeddedPrefix { + get { + return ResourceManager.GetString("ErrInvalidEmbeddedPrefix", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid dedent level, no previous matching indent found.. + /// + public static string ErrInvDedent { + get { + return ResourceManager.GetString("ErrInvDedent", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid escape sequence: \{0}.. + /// + public static string ErrInvEscape { + get { + return ResourceManager.GetString("ErrInvEscape", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid escape sequence.. + /// + public static string ErrInvEscSeq { + get { + return ResourceManager.GetString("ErrInvEscSeq", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid escape symbol, expected 'u' or 'U' only.. + /// + public static string ErrInvEscSymbol { + get { + return ResourceManager.GetString("ErrInvEscSymbol", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid number.. + /// + public static string ErrInvNumber { + get { + return ResourceManager.GetString("ErrInvNumber", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid switch '{0}' for regular expression. + /// + public static string ErrInvRegexSwitch { + get { + return ResourceManager.GetString("ErrInvRegexSwitch", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Error in string literal [{0}]: No start/end symbols specified.. + /// + public static string ErrInvStrDef { + get { + return ResourceManager.GetString("ErrInvStrDef", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to The last term of production containing SyntaxError must be a terminal. NonTerminal: {0}. + /// + public static string ErrLastTermOfErrorProd { + get { + return ResourceManager.GetString("ErrLastTermOfErrorProd", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to List non-terminals cannot be marked transient; list: ({0}). + /// + public static string ErrListCannotBeTransient { + get { + return ResourceManager.GetString("ErrListCannotBeTransient", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Expected new line symbol.. + /// + public static string ErrNewLineExpected { + get { + return ResourceManager.GetString("ErrNewLineExpected", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to No closing pair for opening symbol {0}. + /// + public static string ErrNoClosingBrace { + get { + return ResourceManager.GetString("ErrNoClosingBrace", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to AstNodeType or AstNodeCreator is not set on non-terminals: {0}. Either set Term.AstConfig.NodeType, or provide default values in AstContext.. + /// + public static string ErrNodeTypeNotSetOn { + get { + return ResourceManager.GetString("ErrNodeTypeNotSetOn", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to No end symbol for regex literal.. + /// + public static string ErrNoEndForRegex { + get { + return ResourceManager.GetString("ErrNoEndForRegex", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to No ending tag '{0}' found in embedded expression.. + /// + public static string ErrNoEndTagInEmbExpr { + get { + return ResourceManager.GetString("ErrNoEndTagInEmbExpr", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to UnExprNode: no implementation for unary operator '{0}'.. + /// + public static string ErrNoImplForUnaryOp { + get { + return ResourceManager.GetString("ErrNoImplForUnaryOp", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Number cannot be followed by a letter.. + /// + public static string ErrNoLetterAfterNum { + get { + return ResourceManager.GetString("ErrNoLetterAfterNum", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to ParserDataBuilder error: inadequate state {0}, reduce item '{1}' has no lookaheads.. + /// + public static string ErrNoLkhds { + get { + return ResourceManager.GetString("ErrNoLkhds", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Non-terminal {0} has uninitialized Rule property.. + /// + public static string ErrNtRuleIsNull { + get { + return ResourceManager.GetString("ErrNtRuleIsNull", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Attempt to evaluate NULL AST node. The AST node for term '{0}' was not created during parsing.. + /// + public static string ErrNullNodeEval { + get { + return ResourceManager.GetString("ErrNullNodeEval", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Operator '{0}' is not defined for types {1}.. + /// + public static string ErrOpNotDefinedForType { + get { + return ResourceManager.GetString("ErrOpNotDefinedForType", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Operator '{0}' is not defined for types {1} and {2}.. + /// + public static string ErrOpNotDefinedForTypes { + get { + return ResourceManager.GetString("ErrOpNotDefinedForTypes", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Operator '{0} not imlemented.. + /// + public static string ErrOpNotImplemented { + get { + return ResourceManager.GetString("ErrOpNotImplemented", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to {0}: {1}. + /// + public static string ErrOutErrorPrintFormat { + get { + return ResourceManager.GetString("ErrOutErrorPrintFormat", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to CodeOutlineFilter: line continuation symbol '{0}' should be added to Grammar.NonGrammarTerminals list.. + /// + public static string ErrOutlineFilterContSymbol { + get { + return ResourceManager.GetString("ErrOutlineFilterContSymbol", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Syntax error, unexpected input.. + /// + public static string ErrParserUnexpectedInput { + get { + return ResourceManager.GetString("ErrParserUnexpectedInput", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Parsed tree is null, cannot evaluate.. + /// + public static string ErrParseTreeNull { + get { + return ResourceManager.GetString("ErrParseTreeNull", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Parse tree root is null, cannot evaluate.. + /// + public static string ErrParseTreeRootNull { + get { + return ResourceManager.GetString("ErrParseTreeRootNull", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Root AST node is null, cannot evaluate.. + /// + public static string ErrRootAstNodeNull { + get { + return ResourceManager.GetString("ErrRootAstNodeNull", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Root AST node does not implement IInterpretedAstNode interface, cannot evaluate.. + /// + public static string ErrRootAstNoInterface { + get { + return ResourceManager.GetString("ErrRootAstNoInterface", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to ({0}) term passed as 'root' paramater to parserr is not Root or snippet root of the grammar. Add it to SnippetRoots set in grammar constructor.. + /// + public static string ErrRootNotRegistered { + get { + return ResourceManager.GetString("ErrRootNotRegistered", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Root property of the grammar is not set.. + /// + public static string ErrRootNotSet { + get { + return ResourceManager.GetString("ErrRootNotSet", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Reduce-reduce conflict. State {0}, lookaheads: {1}. Selected reduce on first production in conflict set.. + /// + public static string ErrRRConflict { + get { + return ResourceManager.GetString("ErrRRConflict", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Rule for NonTerminal {0} contains null as an operand in position {1} in one of productions.. + /// + public static string ErrRuleContainsNull { + get { + return ResourceManager.GetString("ErrRuleContainsNull", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Shift-reduce conflict. State {0}, lookaheads [{1}]. Selected shift as preferred action.. + /// + public static string ErrSRConflict { + get { + return ResourceManager.GetString("ErrSRConflict", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Syntax error, expected: {0}. + /// + public static string ErrSyntaxErrorExpected { + get { + return ResourceManager.GetString("ErrSyntaxErrorExpected", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Syntax error.. + /// + public static string ErrSyntaxErrorNoInfo { + get { + return ResourceManager.GetString("ErrSyntaxErrorNoInfo", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Expression root non-terminal in template settings (AstNodeConfig property) in templated string literal [{0}] is not added to Roots set. Add it to SnippetRoots in grammar constructor.. + /// + public static string ErrTemplExprNotRoot { + get { + return ResourceManager.GetString("ErrTemplExprNotRoot", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Expression root is not specified in template settings (AstNodeConfig property) in templated string literal [{0}]. . + /// + public static string ErrTemplMissingExprRoot { + get { + return ResourceManager.GetString("ErrTemplMissingExprRoot", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Error in string literal [{0}]: IsTemplate flag is set, but TemplateSettings is not provided in AstNodeConfig property.. + /// + public static string ErrTemplNoSettings { + get { + return ResourceManager.GetString("ErrTemplNoSettings", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to A terminal {0} has empty prefix.. + /// + public static string ErrTerminalHasEmptyPrefix { + get { + return ResourceManager.GetString("ErrTerminalHasEmptyPrefix", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Transient non-terminal must have zero or one non-punctuation child nodes; non-terminals: {0}.. + /// + public static string ErrTransientNtMustHaveOneTerm { + get { + return ResourceManager.GetString("ErrTransientNtMustHaveOneTerm", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Unclosed comment block. + /// + public static string ErrUnclosedComment { + get { + return ResourceManager.GetString("ErrUnclosedComment", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Unexpected end of file.. + /// + public static string ErrUnexpEof { + get { + return ResourceManager.GetString("ErrUnexpEof", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Unexpected indentation.. + /// + public static string ErrUnexpIndent { + get { + return ResourceManager.GetString("ErrUnexpIndent", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Unmatched closing brace '{0}'.. + /// + public static string ErrUnmatchedCloseBrace { + get { + return ResourceManager.GetString("ErrUnmatchedCloseBrace", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Variable {0} is not a callable function.. + /// + public static string ErrVarIsNotCallable { + get { + return ResourceManager.GetString("ErrVarIsNotCallable", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Variable {0} not defined.. + /// + public static string ErrVarNotDefined { + get { + return ResourceManager.GetString("ErrVarNotDefined", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Invalid number of arguments. Expected {0}, found {1}.. + /// + public static string ErrWrongArgCount { + get { + return ResourceManager.GetString("ErrWrongArgCount", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to ({0}:{1}). + /// + public static string FmtRowCol { + get { + return ResourceManager.GetString("FmtRowCol", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Accept. + /// + public static string LabelActionAccept { + get { + return ResourceManager.GetString("LabelActionAccept", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Operator, shift to {0}/reduce on {1}.. + /// + public static string LabelActionOp { + get { + return ResourceManager.GetString("LabelActionOp", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Reduce on {0}. + /// + public static string LabelActionReduce { + get { + return ResourceManager.GetString("LabelActionReduce", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Shift to {0}. + /// + public static string LabelActionShift { + get { + return ResourceManager.GetString("LabelActionShift", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (Unknown action type). + /// + public static string LabelActionUnknown { + get { + return ResourceManager.GetString("LabelActionUnknown", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (EOF). + /// + public static string LabelEofMark { + get { + return ResourceManager.GetString("LabelEofMark", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to [end-of-statement]. + /// + public static string LabelEosLabel { + get { + return ResourceManager.GetString("LabelEosLabel", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (INITIAL STATE). + /// + public static string LabelInitialState { + get { + return ResourceManager.GetString("LabelInitialState", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (Key symbol). + /// + public static string LabelKeySymbol { + get { + return ResourceManager.GetString("LabelKeySymbol", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (Keyword). + /// + public static string LabelKeyword { + get { + return ResourceManager.GetString("LabelKeyword", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to [line break]. + /// + public static string LabelLineBreak { + get { + return ResourceManager.GetString("LabelLineBreak", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Location:. + /// + public static string LabelLocation { + get { + return ResourceManager.GetString("LabelLocation", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to None. + /// + public static string LabelNone { + get { + return ResourceManager.GetString("LabelNone", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to .... + /// + public static string LabelSrcHaveMore { + get { + return ResourceManager.GetString("LabelSrcHaveMore", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to (unnamed). + /// + public static string LabelUnnamed { + get { + return ResourceManager.GetString("LabelUnnamed", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Abort script(y/n)?. + /// + public static string MsgAbortScriptYN { + get { + return ResourceManager.GetString("MsgAbortScriptYN", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to {0} Console.\r\nPress Ctrl-C to exit the program.\r\n. + /// + public static string MsgDefaultConsoleGreeting { + get { + return ResourceManager.GetString("MsgDefaultConsoleGreeting", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Console. + /// + public static string MsgDefaultConsoleTitle { + get { + return ResourceManager.GetString("MsgDefaultConsoleTitle", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Custom action did not execute: parser state or input did not change.. + /// + public static string MsgErrorCustomActionDidNotAdvance { + get { + return ResourceManager.GetString("MsgErrorCustomActionDidNotAdvance", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Exit console (y/n)?. + /// + public static string MsgExitConsoleYN { + get { + return ResourceManager.GetString("MsgExitConsoleYN", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to NLALR transform: Add WrapTail() in '.' position to [{0}].. + /// + public static string MsgNLALRAdvice { + get { + return ResourceManager.GetString("MsgNLALRAdvice", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Press any key to end the program.. + /// + public static string MsgPressAnyKeyToExit { + get { + return ResourceManager.GetString("MsgPressAnyKeyToExit", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to "[{0}], at {1}. + /// + public static string MsgSrcPosToString { + get { + return ResourceManager.GetString("MsgSrcPosToString", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Parsing conflict resolved in code.. + /// + public static string MsgTraceConflictResolved { + get { + return ResourceManager.GetString("MsgTraceConflictResolved", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Executing custom action. + /// + public static string MsgTraceExecCustomAction { + get { + return ResourceManager.GetString("MsgTraceExecCustomAction", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Operator - resolved to {0}. + /// + public static string MsgTraceOpResolved { + get { + return ResourceManager.GetString("MsgTraceOpResolved", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Popped state from stack, pushing {0}. + /// + public static string MsgTracePoppedState { + get { + return ResourceManager.GetString("MsgTracePoppedState", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: {0}. + /// + public static string MsgTraceRecoverAction { + get { + return ResourceManager.GetString("MsgTraceRecoverAction", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to FAILED TO RECOVER. + /// + public static string MsgTraceRecoverFailed { + get { + return ResourceManager.GetString("MsgTraceRecoverFailed", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: Found state with shift on error : {0}. + /// + public static string MsgTraceRecoverFoundState { + get { + return ResourceManager.GetString("MsgTraceRecoverFoundState", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: popping stack, looking for state with error shift. + /// + public static string MsgTraceRecovering { + get { + return ResourceManager.GetString("MsgTraceRecovering", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: Reached end of error production, reducing.. + /// + public static string MsgTraceRecoverReducing { + get { + return ResourceManager.GetString("MsgTraceRecoverReducing", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: Shifting Error term, {0}. + /// + public static string MsgTraceRecoverShiftError { + get { + return ResourceManager.GetString("MsgTraceRecoverShiftError", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERING: shifting until the end of error production.. + /// + public static string MsgTraceRecoverShiftTillEnd { + get { + return ResourceManager.GetString("MsgTraceRecoverShiftTillEnd", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to RECOVERED. + /// + public static string MsgTraceRecoverSuccess { + get { + return ResourceManager.GetString("MsgTraceRecoverSuccess", resourceCulture); + } + } + } +} diff --git a/src/Irony/Resources.resx b/src/Irony/Resources.resx new file mode 100644 index 0000000..a109d51 --- /dev/null +++ b/src/Irony/Resources.resx @@ -0,0 +1,457 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + Nn + + + Yy + + + Argument list not found in the stack. Expected: ValueList, found: {0}. + + + Cannot convert value from type {0} to type {1}, type converter not defined. + + + Fatal error: + + + Construct '{0}' is not supported (yet) by language implementation. + + + Interpreter error, DataStack.Pop() operation failed - stack is empty. + + + Interpreter is busy. + + + Invalid AstMode value in call to Evaluate method. Node: {0}, mode: {1}. + + + Invalid character: '{0}'. + + + UnExprNode: no implementation for unary operator '{0}'. + + + Attempt to evaluate NULL AST node. The AST node for term '{0}' was not created during parsing. + + + Operator '{0}' is not defined for types {1} and {2}. + + + Operator '{0} not imlemented. + + + {0}: {1} + Location: ErrorMessage + + + Parsed tree is null, cannot evaluate. + + + Parse tree root is null, cannot evaluate. + + + Root AST node is null, cannot evaluate. + + + Root AST node does not implement IInterpretedAstNode interface, cannot evaluate. + + + Variable {0} is not a callable function. + + + Variable {0} not defined. + + + Invalid number of arguments. Expected {0}, found {1}. + + + ({0}:{1}) + + + Location: + + + None + + + Abort script(y/n)? + + + Exit console (y/n)? + + + Press any key to end the program. + + + Ambiguous grammar, unresolvable reduce-reduce conflicts. State {0}, lookaheads [{1}] + + + Ambiguous grammar, unresolvable shift-reduce conflicts. State {0}, lookaheads [{1}] + + + {0} State {1} on inputs: {2} + + + AstNodeType or AstNodeCreator is not set on non-terminals: {0}. Either set Term.AstConfig.NodeType, or provide default values in AstContext. + + + ParserDataBuilder error: inadequate state {0}, reduce item '{1}' has no lookaheads. + + + Non-terminal {0} has uninitialized Rule property. + + + Root property of the grammar is not set. + + + Reduce-reduce conflict. State {0}, lookaheads: {1}. Selected reduce on first production in conflict set. + + + Rule for NonTerminal {0} contains null as an operand in position {1} in one of productions. + + + Shift-reduce conflict. State {0}, lookaheads [{1}]. Selected shift as preferred action. + + + NLALR transform: Add WrapTail() in '.' position to [{0}]. + + + Syntax error, expected: {0} + + + [end-of-statement] + + + (unnamed) + + + {0} Console.\r\nPress Ctrl-C to exit the program.\r\n + + + Console + + + Failed to create AST node for non-terminal [{0}], error: {1} + + + Syntax error. + + + Unexpected end of file. + + + Unexpected indentation. + + + Unmatched closing brace '{0}'. + + + Accept + + + Operator, shift to {0}/reduce on {1}. + + + Reduce on {0} + + + Shift to {0} + + + (Unknown action type) + + + (INITIAL STATE) + + + Parsing conflict resolved in code. + + + Operator - resolved to {0} + + + Popped state from stack, pushing {0} + + + FAILED TO RECOVER + + + RECOVERING: popping stack, looking for state with error shift + + + RECOVERED + + + (EOF) + + + ... + + + "[{0}], at {1} + + + Invalid length of char literal - should be a single character. + + + Mal-formed string literal - cannot find termination symbol. + + + Invalid unicode escape ({0}), expected {1} hex digits. + + + Invalid \x escape, at least one digit expected. + + + Duplicate switch '{0}' for regular expression. + + + Cannot convert literal {0} to type {1}. + + + Invalid escape sequence: \{0}. + + + Invalid escape sequence. + + + Invalid escape symbol, expected 'u' or 'U' only. + + + Invalid number. + + + Invalid switch '{0}' for regular expression + + + Error in string literal [{0}]: No start/end symbols specified. + + + No end symbol for regex literal. + + + Number cannot be followed by a letter. + + + Unclosed comment block + + + (Key symbol) + + + (Keyword) + + + [line break] + + + Invalid dedent level, no previous matching indent found. + + + CodeOutlineFilter: line continuation symbol '{0}' should be added to Grammar.NonGrammarTerminals list. + + + RECOVERING: {0} + + + RECOVERING: Found state with shift on error : {0} + + + RECOVERING: Reached end of error production, reducing. + + + RECOVERING: Shifting Error term, {0} + + + RECOVERING: shifting until the end of error production. + + + Could not find a closing quote for quoted value. + + + Invalid arguments for IncDecNode AST node: either first or second argument should be '--' or '++'. + + + Invalid operation, attempt to assign to a constant or literal value. + + + Error in string literal [{0}]: IsTemplate flag is set, but TemplateSettings is not provided in AstNodeConfig property. + + + Duplicate start symbol {0} in string literal [{1}]. + + + Invalid embedded expression. + + + No ending tag '{0}' found in embedded expression. + + + Expression root non-terminal in template settings (AstNodeConfig property) in templated string literal [{0}] is not added to Roots set. Add it to SnippetRoots in grammar constructor. + + + Expression root is not specified in template settings (AstNodeConfig property) in templated string literal [{0}]. + + + ({0}) term passed as 'root' paramater to parserr is not Root or snippet root of the grammar. Add it to SnippetRoots set in grammar constructor. + + + ImpliedSymbolTerminal cannot be used in grammar with DisableScannerParserLink flag set + + + List non-terminals cannot be marked transient; list: ({0}) + + + Transient non-terminal must have zero or one non-punctuation child nodes; non-terminals: {0}. + + + The last term of production containing SyntaxError must be a terminal. NonTerminal: {0} + + + A terminal {0} has empty prefix. + + + No closing pair for opening symbol {0} + + + Operator '{0}' is not defined for types {1}. + + + Failed to find the ending tag '{0}' for a text literal. + + + Expected new line symbol. + + + Custom action did not execute: parser state or input did not change. + + + Executing custom action + + + Fatal error in parser: attempt to move back in the source. + + + Syntax error, unexpected input. + + \ No newline at end of file diff --git a/src/Irony/Sanchime.Irony.csproj b/src/Irony/Sanchime.Irony.csproj new file mode 100644 index 0000000..b026100 --- /dev/null +++ b/src/Irony/Sanchime.Irony.csproj @@ -0,0 +1,27 @@ + + + + netstandard2.0 + 10.0 + enable + Irony + Irony.NetCore is a .NET Core compatible version of the Irony framework initially developed and maintained by Roman Ivantsov. Irony is a development kit for implementing languages on .NET platform. In Irony the target language grammar is coded directly in c# using operator overloading to express grammar constructs. Irony's scanner and parser modules use the grammar encoded as c# class to control the parsing process. + Github + irony;parser + true + LICENSE + + + + + + + + + + True + + + + + diff --git a/src/Irony/Utilities/Extensions.cs b/src/Irony/Utilities/Extensions.cs new file mode 100644 index 0000000..698f29b --- /dev/null +++ b/src/Irony/Utilities/Extensions.cs @@ -0,0 +1,30 @@ +namespace Sanchime.Irony.Utilities +{ + public static class ParsingEnumExtensions + { + public static bool IsSet(this TermFlags flags, TermFlags flag) + { + return (flags & flag) != 0; + } + + public static bool IsSet(this LanguageFlags flags, LanguageFlags flag) + { + return (flags & flag) != 0; + } + + public static bool IsSet(this ParseOptions options, ParseOptions option) + { + return (options & option) != 0; + } + + public static bool IsSet(this TermListOptions options, TermListOptions option) + { + return (options & option) != 0; + } + + public static bool IsSet(this ProductionFlags flags, ProductionFlags flag) + { + return (flags & flag) != 0; + } + }//class +} \ No newline at end of file diff --git a/src/Irony/Utilities/LogMessage.cs b/src/Irony/Utilities/LogMessage.cs new file mode 100644 index 0000000..8ffa780 --- /dev/null +++ b/src/Irony/Utilities/LogMessage.cs @@ -0,0 +1,53 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Utilities +{ + public enum ErrorLevel + { + Info = 0, + Warning = 1, + Error = 2, + } + + //Container for syntax errors and warnings + public class LogMessage + { + public LogMessage(ErrorLevel level, SourceLocation location, string message, ParserState parserState) + { + Level = level; + Location = location; + Message = message; + ParserState = parserState; + } + + public readonly ErrorLevel Level; + public readonly ParserState ParserState; + public readonly SourceLocation Location; + public readonly string Message; + + public override string ToString() + { + return Message; + } + }//class + + public class LogMessageList : List + { + public static int ByLocation(LogMessage x, LogMessage y) + { + return SourceLocation.Compare(x.Location, y.Location); + } + } +}//namespace \ No newline at end of file diff --git a/src/Irony/Utilities/StringUtils.cs b/src/Irony/Utilities/StringUtils.cs new file mode 100644 index 0000000..a947351 --- /dev/null +++ b/src/Irony/Utilities/StringUtils.cs @@ -0,0 +1,131 @@ +#region License + +/* ********************************************************************************** + * Copyright (c) Roman Ivantsov + * This source code is subject to terms and conditions of the MIT License + * for Irony. A copy of the license can be found in the License.txt file + * at the root of this distribution. + * By using this source code in any fashion, you are agreeing to be bound by the terms of the + * MIT License. + * You must not remove this notice from this software. + * **********************************************************************************/ + +#endregion + +namespace Sanchime.Irony.Utilities +{ + public static class Strings + { + public const string AllLatinLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + public const string DecimalDigits = "1234567890"; + public const string OctalDigits = "12345670"; + public const string HexDigits = "1234567890aAbBcCdDeEfF"; + public const string BinaryDigits = "01"; + + public static string JoinStrings(string separator, IEnumerable values) + { + StringList list = new StringList(); + list.AddRange(values); + string[] arr = new string[list.Count]; + list.CopyTo(arr, 0); + return string.Join(separator, arr); + } + }//class + + public class StringDictionary : Dictionary + { } + + public class CharList : List + { } + + // CharHashSet: adding Hash to the name to avoid confusion with System.Runtime.Interoperability.CharSet + // Adding case sensitivity + public class CharHashSet : HashSet + { + private bool _caseSensitive; + + public CharHashSet(bool caseSensitive = true) + { + _caseSensitive = caseSensitive; + } + + public new void Add(char ch) + { + if (_caseSensitive) + base.Add(ch); + else + { + base.Add(char.ToLowerInvariant(ch)); + base.Add(char.ToUpperInvariant(ch)); + } + } + } + + public class TypeList : List + { + public TypeList() + { } + + public TypeList(params Type[] types) : base(types) + { + } + } + + public class StringSet : HashSet + { + public StringSet() + { } + + public StringSet(StringComparer comparer) : base(comparer) + { + } + + public override string ToString() + { + return ToString(" "); + } + + public void AddRange(params string[] items) + { + UnionWith(items); + } + + public string ToString(string separator) + { + return Strings.JoinStrings(separator, this); + } + } + + public class StringList : List + { + public StringList() + { } + + public StringList(params string[] args) + { + AddRange(args); + } + + public override string ToString() + { + return ToString(" "); + } + + public string ToString(string separator) + { + return Strings.JoinStrings(separator, this); + } + + //Used in sorting suffixes and prefixes; longer strings must come first in sort order + public static int LongerFirst(string x, string y) + { + try + {//in case any of them is null + if (x.Length > y.Length) return -1; + } + catch { } + if (x == y) return 0; + return 1; + } + }//class +} \ No newline at end of file