Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

RmUtilTokenizer.h

00001 //=================================================================================================//
00002 // filename: RmUtilTokenizer.h                                                                     //
00003 //                                                                                                 //
00004 //           ATI Research, Inc.                                                                    //
00005 //           3D Application Research Group                                                         //
00006 //                                                                                                 //
00007 // Description: declaration file for Tokenizer Utility.                                            //
00008 //                                                                                                 //
00009 //=================================================================================================//
00010 //   (C) 2004 ATI Research, Inc.  All rights reserved.                                             //
00011 //=================================================================================================//
00012 
00013 #ifndef _RM_UTILITIES_TOKENIZER_H_
00014 #define _RM_UTILITIES_TOKENIZER_H_
00015 
00016 //...........................................................................
00017 //...........................................................................
00018 //...........................................................................
00019 //  RmUtilTokenizer
00020 //...........................................................................
00021 //...........................................................................
00022 //...........................................................................
00023 class RMUTIL_API RmUtilTokenizer
00024 {
00025 public : // Sub Classes / Types
00026    enum CharType
00027    {
00028       CHARTYPE_NUMBER,
00029       CHARTYPE_CHAR,
00030       CHARTYPE_OPERATOR,
00031       CHARTYPE_WHITESPACE
00032    }; // End of CharType
00033 
00034    enum TokenType
00035    {
00036       TOKEN_UNKNOWN,
00037       TOKEN_KEYWORD,
00038       TOKEN_OPERATOR,
00039       TOKEN_IDENTIFIER,
00040       TOKEN_INTEGER,
00041       TOKEN_FLOAT,
00042       TOKEN_INVALID,
00043       TOKEN_EOF
00044    }; // End of TokenType
00045 
00046    //----------------------------------------------------------------------
00047    // Keyword
00048    //----------------------------------------------------------------------
00049    class RMUTIL_API Keyword
00050    {
00051    public :
00052       Keyword( const RM_TCHAR *szWord, int id ) : 
00053           m_word(szWord), 
00054           m_id(id)
00055       {
00056       }; // End of Constructor
00057 
00058       const RmStringT& GetWord() const { return m_word; };
00059       int   GetID() const { return m_id; };
00060 
00061    private :
00062       RmStringT m_word;
00063       int       m_id;
00064    }; // End of Keyword
00065 
00066    //----------------------------------------------------------------------
00067    // Comment Block
00068    //----------------------------------------------------------------------
00069    class RMUTIL_API CommentBlock
00070    {
00071    public :
00072       CommentBlock( const RM_TCHAR *szStart, const RM_TCHAR *szEnd ) :
00073          m_startOfComment(szStart),
00074          m_endOfComment(szEnd)
00075       {
00076       }; // Ebdof Constructor
00077 
00078       ~CommentBlock() {};
00079 
00080       const RmStringT& GetStartOfComment() const { return m_startOfComment; };
00081       const RmStringT& GetEndOfComment() const { return m_endOfComment; };
00082 
00083    private :
00084       RmStringT  m_startOfComment;
00085       RmStringT  m_endOfComment;
00086    }; // End of CommentBlock
00087 
00088    //----------------------------------------------------------------------
00089    // Token
00090    //----------------------------------------------------------------------
00091    class RMUTIL_API Token
00092    {
00093       friend RmUtilTokenizer;
00094 
00095    public :
00096       Token() {};
00097       ~Token() {};
00098 
00099       TokenType GetTokenType() const { return m_tokenType; };
00100 
00101       const RmStringT& GetText() const { return m_text; };
00102       int   GetID() const { return m_id; };
00103 
00104       double GetFloatNumber() const { return m_floatNumber; };
00105       int    GetIntegerNumber() const { return m_integerNumber; };
00106 
00107    private :
00108       TokenType m_tokenType;
00109 
00110       RmStringT m_text;
00111 
00112       int       m_id;   // Keyword or Operator 
00113 
00114       double    m_floatNumber;
00115       int       m_integerNumber;
00116    }; // End of Token
00117 
00118 
00119    //----------------------------------------------------------------------
00120    // ParseParam
00121    //----------------------------------------------------------------------
00122    class RMUTIL_API ParseParam
00123    {
00124    public :
00125       const RM_TCHAR *m_pBuffer;
00126       int             m_size;
00127       int             m_curPos;
00128 
00129       //-------------------------------------------------------------------
00130       // Returns current char and advances position
00131       //-------------------------------------------------------------------
00132       RM_TCHAR GetChar( bool bAdvance = true )
00133       {
00134          assert(IsEndReached()==false);
00135          RM_TCHAR ch = m_pBuffer[m_curPos];
00136          if (bAdvance)
00137             m_curPos++;
00138 
00139          return ch;
00140       }; // End of GetChar
00141 
00142       //-------------------------------------------------------------------
00143       bool IsEndReached() const 
00144       {
00145          if (m_curPos>=m_size)
00146             return true;
00147 
00148          return false;
00149       }; // End of IsEnedReached
00150    }; // End of ParseParam
00151 
00152 public :
00153    RmUtilTokenizer();
00154    virtual ~RmUtilTokenizer();
00155 
00156    //------------------------------------------------------------------
00157    // Init/Uninit
00158    //------------------------------------------------------------------
00159    virtual void Initialize();
00160    virtual void Uninitialize();
00161 
00162    //------------------------------------------------------------------
00163    // Parsing
00164    //------------------------------------------------------------------
00165    void BeginParsing( const RM_TCHAR *pBuffer, int sizeOfBuffer );
00166 
00167       void ParseNext();
00168       bool IsEndReached() const;
00169 
00170    void EndParsing();
00171 
00172    //------------------------------------------------------------------
00173    // Keyword
00174    //------------------------------------------------------------------
00175    void AddKeyword( const RM_TCHAR *szKeyword, int id );
00176    void AddOperator( const RM_TCHAR *szOperator, int id );
00177 
00178    //------------------------------------------------------------------
00179    // Comment Blocks
00180    //------------------------------------------------------------------
00181    void AddComment( const RM_TCHAR *szStartOfComment, 
00182                     const RM_TCHAR *szEndOfComment );
00183 
00184    //------------------------------------------------------------------
00185    // Current Token
00186    //------------------------------------------------------------------
00187    const Token* GetCurrentToken() const { return &m_currentToken; };
00188 
00189 protected :
00190    virtual void SetDefaultCharTypes();
00191 
00192 private :
00193    //------------------------------------------------------------------
00194    // Character Types
00195    //------------------------------------------------------------------
00196    CharType  m_charTypes[65536];
00197 
00198    //------------------------------------------------------------------
00199    // Hash Map for Keywords
00200    //
00201    // - Use first character as index to list ( hash key )
00202    //------------------------------------------------------------------
00203    RmLinkedList<Keyword*> m_keywordMap[256];
00204    RmLinkedList<Keyword*> m_operatorMap[256];
00205 
00206    int GetHashKey( const RM_TCHAR *szWord );
00207    void RemoveAllKeywords();
00208    void RemoveAllOperators();
00209 
00210    const Keyword* FindKeyword( const RM_TCHAR *szWord );
00211    const Keyword* FindOperator( const RM_TCHAR *szWord );
00212 
00213    //------------------------------------------------------------------
00214    // Comment
00215    //------------------------------------------------------------------
00216    RmLinkedList<CommentBlock*> m_commentBlocks;
00217 
00218    void RemoveAllCommentBlocks();
00219 
00220    //------------------------------------------------------------------
00221    // Current Token
00222    //------------------------------------------------------------------
00223    Token      m_currentToken;
00224 
00225    //------------------------------------------------------------------
00226    // Parsing
00227    //------------------------------------------------------------------
00228    ParseParam m_parseParam;
00229 
00230    CharType GetCharType( RM_TCHAR ch ) const { return m_charTypes[ch]; };
00231 
00232    void SkipWhitespace( ParseParam &parseParam );   
00233 
00234    // Return true if comment was processed
00235    bool ProcessComment( ParseParam &parseParam );
00236 
00237    // Find starting comment block at current position
00238    CommentBlock* GetCommentBlock( ParseParam &parseParam );
00239 
00240    // Get to end of comment
00241    void ProcessToEndOfComment( ParseParam &parseParam, CommentBlock *pBlock );
00242 
00243    // Process Number
00244    void ProcessNumber( ParseParam &parseParam );
00245 
00246    // Process Text ( Keyword or Identifier )
00247    void ProcessText( ParseParam &parseParam );
00248 
00249    // Process Operator
00250    void ProcessOperator( ParseParam &parseParam );
00251 }; // End of RmUtilTokenizer
00252 
00253 #endif

Generated on Fri Feb 25 16:08:42 2005 for RenderMonkey SDK by doxygen 1.3.6