ImageMagick-mirror/MagickCore/token.c

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%                    TTTTT   OOO   K   K  EEEEE  N   N                        %
%                      T    O   O  K  K   E      NN  N                        %
%                      T    O   O  KKK    EEE    N N N                        %
%                      T    O   O  K  K   E      N  NN                        %
%                      T     OOO   K   K  EEEEE  N   N                        %
%                                                                             %
%                                                                             %
%                         MagickCore Token Methods                            %
%                                                                             %
%                             Software Design                                 %
%                                  Cristy                                     %
%                              January 1993                                   %
%                                                                             %
%                                                                             %
%  Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization         %
%  dedicated to making software imaging solutions freely available.           %
%                                                                             %
%  You may not use this file except in compliance with the License.  You may  %
%  obtain a copy of the License at                                            %
%                                                                             %
%    https://imagemagick.org/license/                                         %
%                                                                             %
%  Unless required by applicable law or agreed to in writing, software        %
%  distributed under the License is distributed on an "AS IS" BASIS,          %
%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
%  See the License for the specific language governing permissions and        %
%  limitations under the License.                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%
%
*/

/*
  Include declarations.
*/
#include "MagickCore/studio.h"
#include "MagickCore/exception.h"
#include "MagickCore/exception-private.h"
#include "MagickCore/image.h"
#include "MagickCore/image-private.h"
#include "MagickCore/locale-private.h"
#include "MagickCore/memory_.h"
#include "MagickCore/memory-private.h"
#include "MagickCore/string_.h"
#include "MagickCore/string-private.h"
#include "MagickCore/token.h"
#include "MagickCore/token-private.h"
#include "MagickCore/utility.h"
#include "MagickCore/utility-private.h"

/*
  Typedef declarations.
*/
struct _TokenInfo
{
  int
    state;

  MagickStatusType
    flag;

  ssize_t
    offset;

  char
    quote;

  size_t
    signature;
};

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   A c q u i r e T o k e n I n f o                                           %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  AcquireTokenInfo() allocates the TokenInfo structure.
%
%  The format of the AcquireTokenInfo method is:
%
%      TokenInfo *AcquireTokenInfo()
%
*/
MagickExport TokenInfo *AcquireTokenInfo(void)
{
  TokenInfo
    *token_info;

  token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
  token_info->signature=MagickCoreSignature;
  return(token_info);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   D e s t r o y T o k e n I n f o                                           %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  DestroyTokenInfo() deallocates memory associated with an TokenInfo
%  structure.
%
%  The format of the DestroyTokenInfo method is:
%
%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
%
%  A description of each parameter follows:
%
%    o token_info: Specifies a pointer to an TokenInfo structure.
%
*/
MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
{
  assert(token_info != (TokenInfo *) NULL);
  assert(token_info->signature == MagickCoreSignature);
  if (IsEventLogging() != MagickFalse)
    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
  token_info->signature=(~MagickCoreSignature);
  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
  return(token_info);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
+   G e t N e x t T o k e n                                                   %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  GetNextToken() gets a token from the token stream.  A token is defined as
%  a sequence of characters delimited by whitespace (e.g. clip-path), a
%  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
%  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
%  separator characters: ':', '=', ',', and ';'.  GetNextToken() returns the
%  length of the consumed token.
%
%  The format of the GetNextToken method is:
%
%      size_t GetNextToken(const char *magick_restrict start,
%        const char **magick_restrict end,const size_t extent,
%        char *magick_restrict token)
%
%  A description of each parameter follows:
%
%    o start: the start of the token sequence.
%
%    o end: point to the end of the token sequence.
%
%    o extent: maximum extent of the token.
%
%    o token: copy the token to this buffer.
%
*/
MagickExport magick_hot_spot size_t GetNextToken(
  const char *magick_restrict start,const char **magick_restrict end,
  const size_t extent,char *magick_restrict token)
{
  char
    *magick_restrict q;

  const char
    *magick_restrict p;

  double
    value;

  ssize_t
    i;

  assert(start != (const char *) NULL);
  assert(token != (char *) NULL);
  i=0;
  p=start;
  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
    p++;
  switch (*p)
  {
    case '\0':
      break;
    case '"':
    case '\'':
    case '`':
    case '{':
    {
      char
        escape;

      switch (*p)
      {
        case '"': escape='"'; break;
        case '\'': escape='\''; break;
        case '`': escape='\''; break;
        case '{': escape='}'; break;
        default: escape=(*p); break;
      }
      for (p++; *p != '\0'; p++)
      {
        if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
          p++;
        else
          if (*p == escape)
            {
              p++;
              break;
            }
        if (i < (ssize_t) (extent-1))
          token[i++]=(*p);
        if ((size_t) (p-start) >= (extent-1))
          break;
      }
      break;
    }
    case '/':
    {
      if (i < (ssize_t) (extent-1))
        token[i++]=(*p);
      p++;
      if ((*p == '>') || (*p == '/'))
        {
          if (i < (ssize_t) (extent-1))
            token[i++]=(*p);
          p++;
        }
      break;
    }
    default:
    {
      value=StringToDouble(p,&q);
      (void) value;
      if ((p != q) && (*p != ','))
        {
          for ( ; (p < q) && (*p != ','); p++)
          {
            if (i < (ssize_t) (extent-1))
              token[i++]=(*p);
            if ((size_t) (p-start) >= (extent-1))
              break;
          }
          if (*p == '%')
            {
              if (i < (ssize_t) (extent-1))
                token[i++]=(*p);
              p++;
            }
          break;
        }
      if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
          (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
        {
          if (i < (ssize_t) (extent-1))
            token[i++]=(*p);
          p++;
          break;
        }
      for ( ; *p != '\0'; p++)
      {
        if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
            (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
          break;
        if ((i > 0) && (*p == '<'))
          break;
        if (i < (ssize_t) (extent-1))
          token[i++]=(*p);
        if (*p == '>')
          break;
        if (*p == '(')
          {
            for (p++; *p != '\0'; p++)
            {
              if (i < (ssize_t) (extent-1))
                token[i++]=(*p);
              if ((*p == ')') && (*(p-1) != '\\'))
                break;
              if ((size_t) (p-start) >= (extent-1))
                break;
            }
            if (*p == '\0')
              break;
          }
        if ((size_t) (p-start) >= (extent-1))
          break;
      }
      break;
    }
  }
  token[i]='\0';
  if (LocaleNCompare(token,"url(#",5) == 0)
    {
      q=strrchr(token,')');
      if (q != (char *) NULL)
        {
          *q='\0';
          (void) memmove(token,token+5,(size_t) (q-token-4));
        }
    }
  while (isspace((int) ((unsigned char) *p)) != 0)
    p++;
  if (end != (const char **) NULL)
    *end=(const char *) p;
  return((size_t) (p-start+1));
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   G l o b E x p r e s s i o n                                               %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  GlobExpression() returns MagickTrue if the expression matches the pattern.
%
%  The format of the GlobExpression function is:
%
%      MagickBooleanType GlobExpression(const char *magick_restrict expression,
%        const char *magick_restrict pattern,
%        const MagickBooleanType case_insensitive)
%
%  A description of each parameter follows:
%
%    o expression: Specifies a pointer to a text string containing a file name.
%
%    o pattern: Specifies a pointer to a text string containing a pattern.
%
%    o case_insensitive: set to MagickTrue to ignore the case when matching
%      an expression.
%
*/

static MagickBooleanType GlobExpression_(const char *magick_restrict expression,
  const char *magick_restrict pattern,const MagickBooleanType case_insensitive,
  const size_t depth)
{
  if (depth > MagickMaxRecursionDepth)
    {
      errno=EOVERFLOW;
      return(MagickFalse);
    }
  /*
    Empty pattern or single '*' always matches.
  */
  if (pattern == (const char *) NULL)
    return(MagickTrue);
  if (GetUTFCode(pattern) == 0)
    return(MagickTrue);
  if ((GetUTFCode(pattern) == '*') &&
      (GetUTFCode(pattern+GetUTFOctets(pattern)) == 0))
    return(MagickTrue);
  if ((strchr(pattern,'{') == NULL) &&
      (strchr(pattern,'*') == NULL) &&
      (strchr(pattern,'?') == NULL))
    {
      char
        path[MagickPathExtent]= { 0 };

      /*
        If no glob characters exist, ensure no subimage specifier.
      */
      GetPathComponent(pattern,SubimagePath,path);
      if (*path != '\0')
        return(MagickFalse);
    }
  while (GetUTFCode(pattern) != 0)
  {
    int
      ecode = GetUTFCode(expression),
      pcode = GetUTFCode(pattern);

    if ((ecode == 0) && (pcode != '*') && (pcode != '{'))
      break;
    switch (pcode)
    {
      case '*':
      {
        do
        {
          /*
            Skip consecutive '*'.
          */
          pattern+=GetUTFOctets(pattern);
        }
        while (GetUTFCode(pattern) == '*');
        while (1)
        {
          /*
            Try to match at each position.
          */
          if (GlobExpression_(expression,pattern,case_insensitive,depth+1) != MagickFalse)
            {
              /*
                Consume rest of expression and pattern.
              */
              while (GetUTFCode(expression) != 0)
                expression+=GetUTFOctets(expression);
              while (GetUTFCode(pattern) != 0)
                pattern+=GetUTFOctets(pattern);
              return(MagickTrue);
            }
            if (GetUTFCode(expression) == 0)
              break;
            expression+=GetUTFOctets(expression);
          }
        return(MagickFalse);
      }
      case '?':
      {
        if (ecode == 0)
          return(MagickFalse);
        pattern+=GetUTFOctets(pattern);
        expression+=GetUTFOctets(expression);
        break;
      }
      case '[':
      {
        const char
          *p = pattern+GetUTFOctets(pattern),
          *q = pattern+GetUTFOctets(pattern);

        MagickBooleanType
          matched = MagickFalse;

        if (ecode == 0)
          return(MagickFalse);
        while ((GetUTFCode(q) != 0) && (GetUTFCode(q) != ']'))
          q+=GetUTFOctets(q);
        if (GetUTFCode(q) == 0)
          return(MagickFalse);  /* malformed */
        while (p < q)
        {
          const char
            *next;

          int
            code = GetUTFCode(p);

          size_t
            octets = GetUTFOctets(p);

          if (code == '\\')
            {
              p+=octets;
              code=GetUTFCode(p);
              octets=GetUTFOctets(p);
            }
          next=p+octets;
          if ((next < q) && (GetUTFCode(next) == '-'))
            {
              int
                ncode;

              next+=GetUTFOctets(next);
              ncode=GetUTFCode(next);
              if (ncode == '\\')
                {
                  next+=GetUTFOctets(next);
                  ncode=GetUTFCode(next);
                }
              if ((ecode >= code) && (ecode <= ncode))
                matched=MagickTrue;
              p=next+GetUTFOctets(next);
            }
          else
            {
              if (ecode == code)
                matched=MagickTrue;
              p+=octets;
            }
        }
        /*
          Skip consecutive '*'.
        */
        if (matched == MagickFalse)
          return(MagickFalse);
        pattern=q+GetUTFOctets(q);  /* skip ']' */
        expression+=GetUTFOctets(expression);
        break;
      }
      case '{':
      {
        char
          *a,
          *alternative;

        const char
          *p,
          *q;

        size_t
          remaining = MagickPathExtent;

        pattern+=GetUTFOctets(pattern);  /* Skip '{' */
        if (GetUTFCode(pattern) == 0)
          return(MagickFalse);
        /*
          End of brace expression: append remaining pattern.
        */
        p=pattern;
        while ((GetUTFCode(p) != 0) && (GetUTFCode(p) != '}'))
        {
#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
          if (GetUTFCode(p) == '\\')
            {
              p+=GetUTFOctets(p);
              if (GetUTFCode(p) == 0)
                break;
            }
#endif
          p+=GetUTFOctets(p);
        }
        if (GetUTFCode(p) != '}')
          return(MagickFalse);  /* malformed */
        q=p+GetUTFOctets(p);
        alternative=AcquireString(pattern);
        a=alternative;
        while (1)
        {
          int
            code = GetUTFCode(pattern);

          size_t
            octets;

          if ((code == 0) || (code == ',') || (code == '}'))
            {
              char
                *subpattern;

              MagickBooleanType
                match;

              /*
                Try alternative as a full sub-pattern.
              */
              *a='\0';
              subpattern=AcquireString(alternative);
              if (ConcatenateString(&subpattern,q) == MagickFalse)
                {
                  subpattern=DestroyString(subpattern);
                  alternative=DestroyString(alternative);
                  return(MagickFalse);
                }
              match=GlobExpression_(expression,subpattern,case_insensitive,
                depth+1);
              subpattern=DestroyString(subpattern);
              if (match != MagickFalse)
                {
                  /*
                    Consume rest of expression and pattern.
                  */
                  while (GetUTFCode(expression) != 0)
                    expression+=GetUTFOctets(expression);
                  pattern=q;
                  while (GetUTFCode(pattern) != 0)
                    pattern+=GetUTFOctets(pattern);
                  alternative=DestroyString(alternative);
                  return(MagickTrue);
                }
              /*
                Reset buffer for next alternative.
              */
              a=alternative;
              remaining=MagickPathExtent;
              if (code == ',')
                {
                  pattern+=GetUTFOctets(pattern);  /* skip ',' */
                  continue;
                }
              break;  /* '}' or end */
            }
          /*
            Copy UTF-8 sequence into alternative.
          */
          octets=GetUTFOctets(pattern);
          if ((octets == 0) || (octets >= remaining))
            break;
          (void) memcpy(a,pattern,octets);
          a+=octets;
          remaining-=octets;
          pattern+=octets;
        }
        alternative=DestroyString(alternative);
        return(MagickFalse);
      }
#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
      case '\\':
      {
        pattern+=GetUTFOctets(pattern);
        if (GetUTFCode(pattern) == 0)
          return(MagickFalse);
        magick_fallthrough;
      }
#endif
      default:
      {
        int
          ec = ecode,
          pc = pcode;

        if (ecode == 0)
          return(MagickFalse);
        if (case_insensitive != MagickFalse)
          {
            pc=LocaleToLowercase(pc);
            ec=LocaleToLowercase(ec);
          }
        if (pc != ec)
          return(MagickFalse);
        pattern+=GetUTFOctets(pattern);
        expression+=GetUTFOctets(expression);
        break;
      }
    }
  }
  while (GetUTFCode(pattern) == '*')
    pattern+=GetUTFOctets(pattern);
  return(((GetUTFCode(expression) == 0) &&
          (GetUTFCode(pattern) == 0)) ? MagickTrue : MagickFalse);
}

MagickExport MagickBooleanType GlobExpression(
  const char *magick_restrict expression,const char *magick_restrict pattern,
  const MagickBooleanType case_insensitive)
{
  return(GlobExpression_(expression,pattern,case_insensitive,0));
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
+     I s G l o b                                                             %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  IsGlob() returns MagickTrue if the path specification contains a globbing
%  pattern.
%
%  The format of the IsGlob method is:
%
%      MagickBooleanType IsGlob(const char *geometry)
%
%  A description of each parameter follows:
%
%    o path: the path.
%
*/
MagickPrivate MagickBooleanType IsGlob(const char *path)
{
  MagickBooleanType
    status = MagickFalse;

  const char
    *p;

  if (IsPathAccessible(path) != MagickFalse)
    return(MagickFalse);
  for (p=path; *p != '\0'; p++)
  {
    switch (*p)
    {
      case '*':
      case '?':
      case '{':
      case '}':
      case '[':
      case ']':
      {
        status=MagickTrue;
        break;
      }
      default:
        break;
    }
  }
  return(status);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   T o k e n i z e r                                                         %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
%  one at a time from a string of characters.  The characters used for white
%  space, for break characters, and for quotes can be specified.  Also,
%  characters in the string can be preceded by a specifiable escape character
%  which removes any special meaning the character may have.
%
%  Here is some terminology:
%
%    o token: A single unit of information in the form of a group of
%      characters.
%
%    o white space: Apace that gets ignored (except within quotes or when
%      escaped), like blanks and tabs. in addition, white space terminates a
%      non-quoted token.
%
%    o break set: One or more characters that separates non-quoted tokens.
%      Commas are a common break character. The usage of break characters to
%      signal the end of a token is the same as that of white space, except
%      multiple break characters with nothing or only white space between
%      generate a null token for each two break characters together.
%
%      For example, if blank is set to be the white space and comma is set to
%      be the break character, the line
%
%        A, B, C ,  , DEF
%
%        ... consists of 5 tokens:
%
%        1)  "A"
%        2)  "B"
%        3)  "C"
%        4)  "" (the null string)
%        5)  "DEF"
%
%    o Quote character: A character that, when surrounding a group of other
%      characters, causes the group of characters to be treated as a single
%      token, no matter how many white spaces or break characters exist in
%      the group. Also, a token always terminates after the closing quote.
%      For example, if ' is the quote character, blank is white space, and
%      comma is the break character, the following string
%
%        A, ' B, CD'EF GHI
%
%        ... consists of 4 tokens:
%
%        1)  "A"
%        2)  " B, CD" (note the blanks & comma)
%        3)  "EF"
%        4)  "GHI"
%
%      The quote characters themselves do not appear in the resultant
%      tokens.  The double quotes are delimiters i use here for
%      documentation purposes only.
%
%    o Escape character: A character which itself is ignored but which
%      causes the next character to be used as is.  ^ and \ are often used
%      as escape characters. An escape in the last position of the string
%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
%      and non-escape) character. For example, assume white space, break
%      character, and quote are the same as in the above examples, and
%      further, assume that ^ is the escape character. Then, in the string
%
%        ABC, ' DEF ^' GH' I ^ J K^ L ^
%
%        ... there are 7 tokens:
%
%        1)  "ABC"
%        2)  " DEF ' GH"
%        3)  "I"
%        4)  " "     (a lone blank)
%        5)  "J"
%        6)  "K L"
%        7)  "^"     (passed as is at end of line)
%
%  The format of the Tokenizer method is:
%
%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
%        const size_t max_token_length,const char *line,const char *white,
%        const char *break_set,const char *quote,const char escape,
%        char *breaker,int *next,char *quoted)
%
%  A description of each parameter follows:
%
%    o flag: right now, only the low order 3 bits are used.
%
%        1 => convert non-quoted tokens to upper case
%        2 => convert non-quoted tokens to lower case
%        0 => do not convert non-quoted tokens
%
%    o token: a character string containing the returned next token
%
%    o max_token_length: the maximum size of "token".  Characters beyond
%      "max_token_length" are truncated.
%
%    o string: the string to be parsed.
%
%    o white: a string of the valid white spaces.  example:
%
%        char whitesp[]={" \t"};
%
%      blank and tab will be valid white space.
%
%    o break: a string of the valid break characters. example:
%
%        char breakch[]={";,"};
%
%      semicolon and comma will be valid break characters.
%
%    o quote: a string of the valid quote characters. An example would be
%
%        char whitesp[]={"'\"");
%
%      (this causes single and double quotes to be valid) Note that a
%      token starting with one of these characters needs the same quote
%      character to terminate it.
%
%      for example:
%
%        "ABC '
%
%      is unterminated, but
%
%        "DEF" and 'GHI'
%
%      are properly terminated.  Note that different quote characters
%      can appear on the same line; only for a given token do the quote
%      characters have to be the same.
%
%    o escape: the escape character (NOT a string ... only one
%      allowed). Use zero if none is desired.
%
%    o breaker: the break character used to terminate the current
%      token.  If the token was quoted, this will be the quote used.  If
%      the token is the last one on the line, this will be zero.
%
%    o next: this variable points to the first character of the
%      next token.  it gets reset by "tokenizer" as it steps through the
%      string.  Set it to 0 upon initialization, and leave it alone
%      after that.  You can change it if you want to jump around in the
%      string or re-parse from the beginning, but be careful.
%
%    o quoted: set to True if the token was quoted and MagickFalse
%      if not.  You may need this information (for example:  in C, a
%      string with quotes around it is a character string, while one
%      without is an identifier).
%
%    o result: 0 if we haven't reached EOS (end of string), and 1
%      if we have.
%
*/

#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_OZONE 3

static ssize_t sindex(int c,const char *string)
{
  const char
    *p;

  for (p=string; *p != '\0'; p++)
    if (c == (int) (*p))
      return((ssize_t) (p-string));
  return(-1);
}

static void StoreToken(TokenInfo *token_info,char *string,
  size_t max_token_length,int c)
{
  ssize_t
    i;

  if ((token_info->offset < 0) ||
      ((size_t) token_info->offset >= (max_token_length-1)))
    return;
  i=token_info->offset++;
  string[i]=(char) c;
  if (token_info->state == IN_QUOTE)
    return;
  switch (token_info->flag & 0x03)
  {
    case 1:
    {
      string[i]=(char) LocaleToUppercase(c);
      break;
    }
    case 2:
    {
      string[i]=(char) LocaleToLowercase(c);
      break;
    }
    default:
      break;
  }
}

MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
  char *token,const size_t max_token_length,const char *line,const char *white,
  const char *break_set,const char *quote,const char escape,char *breaker,
  int *next,char *quoted)
{
  int
    c;

  ssize_t
    i;

  *breaker='\0';
  *quoted='\0';
  if (line[*next] == '\0')
    return(1);
  token_info->state=IN_WHITE;
  token_info->quote=(char) MagickFalse;
  token_info->flag=flag;
  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
  {
    c=(int) line[*next];
    i=sindex(c,break_set);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          case IN_TOKEN:
          case IN_OZONE:
          {
            (*next)++;
            *breaker=break_set[i];
            token[token_info->offset]='\0';
            return(0);
          }
          case IN_QUOTE:
          {
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
        }
        continue;
      }
    i=sindex(c,quote);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          {
            token_info->state=IN_QUOTE;
            token_info->quote=quote[i];
            *quoted=(char) MagickTrue;
            break;
          }
          case IN_QUOTE:
          {
            if (quote[i] != token_info->quote)
              StoreToken(token_info,token,max_token_length,c);
            else
              {
                token_info->state=IN_OZONE;
                token_info->quote='\0';
              }
            break;
          }
          case IN_TOKEN:
          case IN_OZONE:
          {
            *breaker=(char) c;
            token[token_info->offset]='\0';
            return(0);
          }
        }
        continue;
      }
    i=sindex(c,white);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          case IN_OZONE:
            break;
          case IN_TOKEN:
          {
            token_info->state=IN_OZONE;
            break;
          }
          case IN_QUOTE:
          {
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
        }
        continue;
      }
    if (c == (int) escape)
      {
        if (line[(*next)+1] == '\0')
          {
            *breaker='\0';
            StoreToken(token_info,token,max_token_length,c);
            (*next)++;
            token[token_info->offset]='\0';
            return(0);
          }
        switch (token_info->state)
        {
          case IN_WHITE:
          {
            (*next)--;
            token_info->state=IN_TOKEN;
            break;
          }
          case IN_TOKEN:
          case IN_QUOTE:
          {
            (*next)++;
            c=(int) line[*next];
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
          case IN_OZONE:
          {
            token[token_info->offset]='\0';
            return(0);
          }
        }
        continue;
      }
    switch (token_info->state)
    {
      case IN_WHITE:
      {
        token_info->state=IN_TOKEN;
        StoreToken(token_info,token,max_token_length,c);
        break;
      }
      case IN_TOKEN:
      case IN_QUOTE:
      {
        StoreToken(token_info,token,max_token_length,c);
        break;
      }
      case IN_OZONE:
      {
        token[token_info->offset]='\0';
        return(0);
      }
    }
  }
  token[token_info->offset]='\0';
  return(0);
}