mirror of
https://github.com/ImageMagick/ImageMagick.git
synced 2026-05-25 11:24:54 +02:00
1069 lines
32 KiB
C
1069 lines
32 KiB
C
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
% TTTTT OOO K K EEEEE N N %
|
|
% T O O K K E NN N %
|
|
% T O O KKK EEE N N N %
|
|
% T O O K K E N NN %
|
|
% T OOO K K EEEEE N N %
|
|
% %
|
|
% %
|
|
% MagickCore Token Methods %
|
|
% %
|
|
% Software Design %
|
|
% Cristy %
|
|
% January 1993 %
|
|
% %
|
|
% %
|
|
% Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization %
|
|
% dedicated to making software imaging solutions freely available. %
|
|
% %
|
|
% You may not use this file except in compliance with the License. You may %
|
|
% obtain a copy of the License at %
|
|
% %
|
|
% https://imagemagick.org/license/ %
|
|
% %
|
|
% Unless required by applicable law or agreed to in writing, software %
|
|
% distributed under the License is distributed on an "AS IS" BASIS, %
|
|
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
|
|
% See the License for the specific language governing permissions and %
|
|
% limitations under the License. %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
%
|
|
%
|
|
*/
|
|
|
|
/*
|
|
Include declarations.
|
|
*/
|
|
#include "MagickCore/studio.h"
|
|
#include "MagickCore/exception.h"
|
|
#include "MagickCore/exception-private.h"
|
|
#include "MagickCore/image.h"
|
|
#include "MagickCore/image-private.h"
|
|
#include "MagickCore/locale-private.h"
|
|
#include "MagickCore/memory_.h"
|
|
#include "MagickCore/memory-private.h"
|
|
#include "MagickCore/string_.h"
|
|
#include "MagickCore/string-private.h"
|
|
#include "MagickCore/token.h"
|
|
#include "MagickCore/token-private.h"
|
|
#include "MagickCore/utility.h"
|
|
#include "MagickCore/utility-private.h"
|
|
|
|
/*
|
|
Typedef declarations.
|
|
*/
|
|
struct _TokenInfo
|
|
{
|
|
int
|
|
state;
|
|
|
|
MagickStatusType
|
|
flag;
|
|
|
|
ssize_t
|
|
offset;
|
|
|
|
char
|
|
quote;
|
|
|
|
size_t
|
|
signature;
|
|
};
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
% A c q u i r e T o k e n I n f o %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% AcquireTokenInfo() allocates the TokenInfo structure.
|
|
%
|
|
% The format of the AcquireTokenInfo method is:
|
|
%
|
|
% TokenInfo *AcquireTokenInfo()
|
|
%
|
|
*/
|
|
MagickExport TokenInfo *AcquireTokenInfo(void)
|
|
{
|
|
TokenInfo
|
|
*token_info;
|
|
|
|
token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
|
|
token_info->signature=MagickCoreSignature;
|
|
return(token_info);
|
|
}
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
% D e s t r o y T o k e n I n f o %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% DestroyTokenInfo() deallocates memory associated with an TokenInfo
|
|
% structure.
|
|
%
|
|
% The format of the DestroyTokenInfo method is:
|
|
%
|
|
% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
|
|
%
|
|
% A description of each parameter follows:
|
|
%
|
|
% o token_info: Specifies a pointer to an TokenInfo structure.
|
|
%
|
|
*/
|
|
MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
|
|
{
|
|
assert(token_info != (TokenInfo *) NULL);
|
|
assert(token_info->signature == MagickCoreSignature);
|
|
if (IsEventLogging() != MagickFalse)
|
|
(void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
|
|
token_info->signature=(~MagickCoreSignature);
|
|
token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
|
|
return(token_info);
|
|
}
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
+ G e t N e x t T o k e n %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% GetNextToken() gets a token from the token stream. A token is defined as
|
|
% a sequence of characters delimited by whitespace (e.g. clip-path), a
|
|
% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
|
|
% parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
|
|
% separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
|
|
% length of the consumed token.
|
|
%
|
|
% The format of the GetNextToken method is:
|
|
%
|
|
% size_t GetNextToken(const char *magick_restrict start,
|
|
% const char **magick_restrict end,const size_t extent,
|
|
% char *magick_restrict token)
|
|
%
|
|
% A description of each parameter follows:
|
|
%
|
|
% o start: the start of the token sequence.
|
|
%
|
|
% o end: point to the end of the token sequence.
|
|
%
|
|
% o extent: maximum extent of the token.
|
|
%
|
|
% o token: copy the token to this buffer.
|
|
%
|
|
*/
|
|
MagickExport magick_hot_spot size_t GetNextToken(
|
|
const char *magick_restrict start,const char **magick_restrict end,
|
|
const size_t extent,char *magick_restrict token)
|
|
{
|
|
char
|
|
*magick_restrict q;
|
|
|
|
const char
|
|
*magick_restrict p;
|
|
|
|
double
|
|
value;
|
|
|
|
ssize_t
|
|
i;
|
|
|
|
assert(start != (const char *) NULL);
|
|
assert(token != (char *) NULL);
|
|
i=0;
|
|
p=start;
|
|
while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
|
|
p++;
|
|
switch (*p)
|
|
{
|
|
case '\0':
|
|
break;
|
|
case '"':
|
|
case '\'':
|
|
case '`':
|
|
case '{':
|
|
{
|
|
char
|
|
escape;
|
|
|
|
switch (*p)
|
|
{
|
|
case '"': escape='"'; break;
|
|
case '\'': escape='\''; break;
|
|
case '`': escape='\''; break;
|
|
case '{': escape='}'; break;
|
|
default: escape=(*p); break;
|
|
}
|
|
for (p++; *p != '\0'; p++)
|
|
{
|
|
if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
|
|
p++;
|
|
else
|
|
if (*p == escape)
|
|
{
|
|
p++;
|
|
break;
|
|
}
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
if ((size_t) (p-start) >= (extent-1))
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case '/':
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
p++;
|
|
if ((*p == '>') || (*p == '/'))
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
p++;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
value=StringToDouble(p,&q);
|
|
(void) value;
|
|
if ((p != q) && (*p != ','))
|
|
{
|
|
for ( ; (p < q) && (*p != ','); p++)
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
if ((size_t) (p-start) >= (extent-1))
|
|
break;
|
|
}
|
|
if (*p == '%')
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
p++;
|
|
}
|
|
break;
|
|
}
|
|
if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
|
|
(*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
p++;
|
|
break;
|
|
}
|
|
for ( ; *p != '\0'; p++)
|
|
{
|
|
if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
|
|
(*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
|
|
break;
|
|
if ((i > 0) && (*p == '<'))
|
|
break;
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
if (*p == '>')
|
|
break;
|
|
if (*p == '(')
|
|
{
|
|
for (p++; *p != '\0'; p++)
|
|
{
|
|
if (i < (ssize_t) (extent-1))
|
|
token[i++]=(*p);
|
|
if ((*p == ')') && (*(p-1) != '\\'))
|
|
break;
|
|
if ((size_t) (p-start) >= (extent-1))
|
|
break;
|
|
}
|
|
if (*p == '\0')
|
|
break;
|
|
}
|
|
if ((size_t) (p-start) >= (extent-1))
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
token[i]='\0';
|
|
if (LocaleNCompare(token,"url(#",5) == 0)
|
|
{
|
|
q=strrchr(token,')');
|
|
if (q != (char *) NULL)
|
|
{
|
|
*q='\0';
|
|
(void) memmove(token,token+5,(size_t) (q-token-4));
|
|
}
|
|
}
|
|
while (isspace((int) ((unsigned char) *p)) != 0)
|
|
p++;
|
|
if (end != (const char **) NULL)
|
|
*end=(const char *) p;
|
|
return((size_t) (p-start+1));
|
|
}
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
% G l o b E x p r e s s i o n %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% GlobExpression() returns MagickTrue if the expression matches the pattern.
|
|
%
|
|
% The format of the GlobExpression function is:
|
|
%
|
|
% MagickBooleanType GlobExpression(const char *magick_restrict expression,
|
|
% const char *magick_restrict pattern,
|
|
% const MagickBooleanType case_insensitive)
|
|
%
|
|
% A description of each parameter follows:
|
|
%
|
|
% o expression: Specifies a pointer to a text string containing a file name.
|
|
%
|
|
% o pattern: Specifies a pointer to a text string containing a pattern.
|
|
%
|
|
% o case_insensitive: set to MagickTrue to ignore the case when matching
|
|
% an expression.
|
|
%
|
|
*/
|
|
|
|
static MagickBooleanType GlobExpression_(const char *magick_restrict expression,
|
|
const char *magick_restrict pattern,const MagickBooleanType case_insensitive,
|
|
const size_t depth)
|
|
{
|
|
if (depth > MagickMaxRecursionDepth)
|
|
{
|
|
errno=EOVERFLOW;
|
|
return(MagickFalse);
|
|
}
|
|
/*
|
|
Empty pattern or single '*' always matches.
|
|
*/
|
|
if (pattern == (const char *) NULL)
|
|
return(MagickTrue);
|
|
if (GetUTFCode(pattern) == 0)
|
|
return(MagickTrue);
|
|
if ((GetUTFCode(pattern) == '*') &&
|
|
(GetUTFCode(pattern+GetUTFOctets(pattern)) == 0))
|
|
return(MagickTrue);
|
|
if ((strchr(pattern,'{') == NULL) &&
|
|
(strchr(pattern,'*') == NULL) &&
|
|
(strchr(pattern,'?') == NULL))
|
|
{
|
|
char
|
|
path[MagickPathExtent]= { 0 };
|
|
|
|
/*
|
|
If no glob characters exist, ensure no subimage specifier.
|
|
*/
|
|
GetPathComponent(pattern,SubimagePath,path);
|
|
if (*path != '\0')
|
|
return(MagickFalse);
|
|
}
|
|
while (GetUTFCode(pattern) != 0)
|
|
{
|
|
int
|
|
ecode = GetUTFCode(expression),
|
|
pcode = GetUTFCode(pattern);
|
|
|
|
if ((ecode == 0) && (pcode != '*') && (pcode != '{'))
|
|
break;
|
|
switch (pcode)
|
|
{
|
|
case '*':
|
|
{
|
|
do
|
|
{
|
|
/*
|
|
Skip consecutive '*'.
|
|
*/
|
|
pattern+=GetUTFOctets(pattern);
|
|
}
|
|
while (GetUTFCode(pattern) == '*');
|
|
while (1)
|
|
{
|
|
/*
|
|
Try to match at each position.
|
|
*/
|
|
if (GlobExpression_(expression,pattern,case_insensitive,depth+1) != MagickFalse)
|
|
{
|
|
/*
|
|
Consume rest of expression and pattern.
|
|
*/
|
|
while (GetUTFCode(expression) != 0)
|
|
expression+=GetUTFOctets(expression);
|
|
while (GetUTFCode(pattern) != 0)
|
|
pattern+=GetUTFOctets(pattern);
|
|
return(MagickTrue);
|
|
}
|
|
if (GetUTFCode(expression) == 0)
|
|
break;
|
|
expression+=GetUTFOctets(expression);
|
|
}
|
|
return(MagickFalse);
|
|
}
|
|
case '?':
|
|
{
|
|
if (ecode == 0)
|
|
return(MagickFalse);
|
|
pattern+=GetUTFOctets(pattern);
|
|
expression+=GetUTFOctets(expression);
|
|
break;
|
|
}
|
|
case '[':
|
|
{
|
|
const char
|
|
*p = pattern+GetUTFOctets(pattern),
|
|
*q = pattern+GetUTFOctets(pattern);
|
|
|
|
MagickBooleanType
|
|
matched = MagickFalse;
|
|
|
|
if (ecode == 0)
|
|
return(MagickFalse);
|
|
while ((GetUTFCode(q) != 0) && (GetUTFCode(q) != ']'))
|
|
q+=GetUTFOctets(q);
|
|
if (GetUTFCode(q) == 0)
|
|
return(MagickFalse); /* malformed */
|
|
while (p < q)
|
|
{
|
|
const char
|
|
*next;
|
|
|
|
int
|
|
code = GetUTFCode(p);
|
|
|
|
size_t
|
|
octets = GetUTFOctets(p);
|
|
|
|
if (code == '\\')
|
|
{
|
|
p+=octets;
|
|
code=GetUTFCode(p);
|
|
octets=GetUTFOctets(p);
|
|
}
|
|
next=p+octets;
|
|
if ((next < q) && (GetUTFCode(next) == '-'))
|
|
{
|
|
int
|
|
ncode;
|
|
|
|
next+=GetUTFOctets(next);
|
|
ncode=GetUTFCode(next);
|
|
if (ncode == '\\')
|
|
{
|
|
next+=GetUTFOctets(next);
|
|
ncode=GetUTFCode(next);
|
|
}
|
|
if ((ecode >= code) && (ecode <= ncode))
|
|
matched=MagickTrue;
|
|
p=next+GetUTFOctets(next);
|
|
}
|
|
else
|
|
{
|
|
if (ecode == code)
|
|
matched=MagickTrue;
|
|
p+=octets;
|
|
}
|
|
}
|
|
/*
|
|
Skip consecutive '*'.
|
|
*/
|
|
if (matched == MagickFalse)
|
|
return(MagickFalse);
|
|
pattern=q+GetUTFOctets(q); /* skip ']' */
|
|
expression+=GetUTFOctets(expression);
|
|
break;
|
|
}
|
|
case '{':
|
|
{
|
|
char
|
|
*a,
|
|
*alternative;
|
|
|
|
const char
|
|
*p,
|
|
*q;
|
|
|
|
size_t
|
|
remaining = MagickPathExtent;
|
|
|
|
pattern+=GetUTFOctets(pattern); /* Skip '{' */
|
|
if (GetUTFCode(pattern) == 0)
|
|
return(MagickFalse);
|
|
/*
|
|
End of brace expression: append remaining pattern.
|
|
*/
|
|
p=pattern;
|
|
while ((GetUTFCode(p) != 0) && (GetUTFCode(p) != '}'))
|
|
{
|
|
#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
|
|
if (GetUTFCode(p) == '\\')
|
|
{
|
|
p+=GetUTFOctets(p);
|
|
if (GetUTFCode(p) == 0)
|
|
break;
|
|
}
|
|
#endif
|
|
p+=GetUTFOctets(p);
|
|
}
|
|
if (GetUTFCode(p) != '}')
|
|
return(MagickFalse); /* malformed */
|
|
q=p+GetUTFOctets(p);
|
|
alternative=AcquireString(pattern);
|
|
a=alternative;
|
|
while (1)
|
|
{
|
|
int
|
|
code = GetUTFCode(pattern);
|
|
|
|
size_t
|
|
octets;
|
|
|
|
if ((code == 0) || (code == ',') || (code == '}'))
|
|
{
|
|
char
|
|
*subpattern;
|
|
|
|
MagickBooleanType
|
|
match;
|
|
|
|
/*
|
|
Try alternative as a full sub-pattern.
|
|
*/
|
|
*a='\0';
|
|
subpattern=AcquireString(alternative);
|
|
if (ConcatenateString(&subpattern,q) == MagickFalse)
|
|
{
|
|
subpattern=DestroyString(subpattern);
|
|
alternative=DestroyString(alternative);
|
|
return(MagickFalse);
|
|
}
|
|
match=GlobExpression_(expression,subpattern,case_insensitive,
|
|
depth+1);
|
|
subpattern=DestroyString(subpattern);
|
|
if (match != MagickFalse)
|
|
{
|
|
/*
|
|
Consume rest of expression and pattern.
|
|
*/
|
|
while (GetUTFCode(expression) != 0)
|
|
expression+=GetUTFOctets(expression);
|
|
pattern=q;
|
|
while (GetUTFCode(pattern) != 0)
|
|
pattern+=GetUTFOctets(pattern);
|
|
alternative=DestroyString(alternative);
|
|
return(MagickTrue);
|
|
}
|
|
/*
|
|
Reset buffer for next alternative.
|
|
*/
|
|
a=alternative;
|
|
remaining=MagickPathExtent;
|
|
if (code == ',')
|
|
{
|
|
pattern+=GetUTFOctets(pattern); /* skip ',' */
|
|
continue;
|
|
}
|
|
break; /* '}' or end */
|
|
}
|
|
/*
|
|
Copy UTF-8 sequence into alternative.
|
|
*/
|
|
octets=GetUTFOctets(pattern);
|
|
if ((octets == 0) || (octets >= remaining))
|
|
break;
|
|
(void) memcpy(a,pattern,octets);
|
|
a+=octets;
|
|
remaining-=octets;
|
|
pattern+=octets;
|
|
}
|
|
alternative=DestroyString(alternative);
|
|
return(MagickFalse);
|
|
}
|
|
#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
|
|
case '\\':
|
|
{
|
|
pattern+=GetUTFOctets(pattern);
|
|
if (GetUTFCode(pattern) == 0)
|
|
return(MagickFalse);
|
|
magick_fallthrough;
|
|
}
|
|
#endif
|
|
default:
|
|
{
|
|
int
|
|
ec = ecode,
|
|
pc = pcode;
|
|
|
|
if (ecode == 0)
|
|
return(MagickFalse);
|
|
if (case_insensitive != MagickFalse)
|
|
{
|
|
pc=LocaleToLowercase(pc);
|
|
ec=LocaleToLowercase(ec);
|
|
}
|
|
if (pc != ec)
|
|
return(MagickFalse);
|
|
pattern+=GetUTFOctets(pattern);
|
|
expression+=GetUTFOctets(expression);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (GetUTFCode(pattern) == '*')
|
|
pattern+=GetUTFOctets(pattern);
|
|
return(((GetUTFCode(expression) == 0) &&
|
|
(GetUTFCode(pattern) == 0)) ? MagickTrue : MagickFalse);
|
|
}
|
|
|
|
MagickExport MagickBooleanType GlobExpression(
|
|
const char *magick_restrict expression,const char *magick_restrict pattern,
|
|
const MagickBooleanType case_insensitive)
|
|
{
|
|
return(GlobExpression_(expression,pattern,case_insensitive,0));
|
|
}
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
+ I s G l o b %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% IsGlob() returns MagickTrue if the path specification contains a globbing
|
|
% pattern.
|
|
%
|
|
% The format of the IsGlob method is:
|
|
%
|
|
% MagickBooleanType IsGlob(const char *geometry)
|
|
%
|
|
% A description of each parameter follows:
|
|
%
|
|
% o path: the path.
|
|
%
|
|
*/
|
|
MagickPrivate MagickBooleanType IsGlob(const char *path)
|
|
{
|
|
MagickBooleanType
|
|
status = MagickFalse;
|
|
|
|
const char
|
|
*p;
|
|
|
|
if (IsPathAccessible(path) != MagickFalse)
|
|
return(MagickFalse);
|
|
for (p=path; *p != '\0'; p++)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case '*':
|
|
case '?':
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
{
|
|
status=MagickTrue;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return(status);
|
|
}
|
|
|
|
/*
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% %
|
|
% %
|
|
% %
|
|
% T o k e n i z e r %
|
|
% %
|
|
% %
|
|
% %
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%
|
|
% Tokenizer() is a generalized, finite state token parser. It extracts tokens
|
|
% one at a time from a string of characters. The characters used for white
|
|
% space, for break characters, and for quotes can be specified. Also,
|
|
% characters in the string can be preceded by a specifiable escape character
|
|
% which removes any special meaning the character may have.
|
|
%
|
|
% Here is some terminology:
|
|
%
|
|
% o token: A single unit of information in the form of a group of
|
|
% characters.
|
|
%
|
|
% o white space: Apace that gets ignored (except within quotes or when
|
|
% escaped), like blanks and tabs. in addition, white space terminates a
|
|
% non-quoted token.
|
|
%
|
|
% o break set: One or more characters that separates non-quoted tokens.
|
|
% Commas are a common break character. The usage of break characters to
|
|
% signal the end of a token is the same as that of white space, except
|
|
% multiple break characters with nothing or only white space between
|
|
% generate a null token for each two break characters together.
|
|
%
|
|
% For example, if blank is set to be the white space and comma is set to
|
|
% be the break character, the line
|
|
%
|
|
% A, B, C , , DEF
|
|
%
|
|
% ... consists of 5 tokens:
|
|
%
|
|
% 1) "A"
|
|
% 2) "B"
|
|
% 3) "C"
|
|
% 4) "" (the null string)
|
|
% 5) "DEF"
|
|
%
|
|
% o Quote character: A character that, when surrounding a group of other
|
|
% characters, causes the group of characters to be treated as a single
|
|
% token, no matter how many white spaces or break characters exist in
|
|
% the group. Also, a token always terminates after the closing quote.
|
|
% For example, if ' is the quote character, blank is white space, and
|
|
% comma is the break character, the following string
|
|
%
|
|
% A, ' B, CD'EF GHI
|
|
%
|
|
% ... consists of 4 tokens:
|
|
%
|
|
% 1) "A"
|
|
% 2) " B, CD" (note the blanks & comma)
|
|
% 3) "EF"
|
|
% 4) "GHI"
|
|
%
|
|
% The quote characters themselves do not appear in the resultant
|
|
% tokens. The double quotes are delimiters i use here for
|
|
% documentation purposes only.
|
|
%
|
|
% o Escape character: A character which itself is ignored but which
|
|
% causes the next character to be used as is. ^ and \ are often used
|
|
% as escape characters. An escape in the last position of the string
|
|
% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
|
|
% and non-escape) character. For example, assume white space, break
|
|
% character, and quote are the same as in the above examples, and
|
|
% further, assume that ^ is the escape character. Then, in the string
|
|
%
|
|
% ABC, ' DEF ^' GH' I ^ J K^ L ^
|
|
%
|
|
% ... there are 7 tokens:
|
|
%
|
|
% 1) "ABC"
|
|
% 2) " DEF ' GH"
|
|
% 3) "I"
|
|
% 4) " " (a lone blank)
|
|
% 5) "J"
|
|
% 6) "K L"
|
|
% 7) "^" (passed as is at end of line)
|
|
%
|
|
% The format of the Tokenizer method is:
|
|
%
|
|
% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
|
|
% const size_t max_token_length,const char *line,const char *white,
|
|
% const char *break_set,const char *quote,const char escape,
|
|
% char *breaker,int *next,char *quoted)
|
|
%
|
|
% A description of each parameter follows:
|
|
%
|
|
% o flag: right now, only the low order 3 bits are used.
|
|
%
|
|
% 1 => convert non-quoted tokens to upper case
|
|
% 2 => convert non-quoted tokens to lower case
|
|
% 0 => do not convert non-quoted tokens
|
|
%
|
|
% o token: a character string containing the returned next token
|
|
%
|
|
% o max_token_length: the maximum size of "token". Characters beyond
|
|
% "max_token_length" are truncated.
|
|
%
|
|
% o string: the string to be parsed.
|
|
%
|
|
% o white: a string of the valid white spaces. example:
|
|
%
|
|
% char whitesp[]={" \t"};
|
|
%
|
|
% blank and tab will be valid white space.
|
|
%
|
|
% o break: a string of the valid break characters. example:
|
|
%
|
|
% char breakch[]={";,"};
|
|
%
|
|
% semicolon and comma will be valid break characters.
|
|
%
|
|
% o quote: a string of the valid quote characters. An example would be
|
|
%
|
|
% char whitesp[]={"'\"");
|
|
%
|
|
% (this causes single and double quotes to be valid) Note that a
|
|
% token starting with one of these characters needs the same quote
|
|
% character to terminate it.
|
|
%
|
|
% for example:
|
|
%
|
|
% "ABC '
|
|
%
|
|
% is unterminated, but
|
|
%
|
|
% "DEF" and 'GHI'
|
|
%
|
|
% are properly terminated. Note that different quote characters
|
|
% can appear on the same line; only for a given token do the quote
|
|
% characters have to be the same.
|
|
%
|
|
% o escape: the escape character (NOT a string ... only one
|
|
% allowed). Use zero if none is desired.
|
|
%
|
|
% o breaker: the break character used to terminate the current
|
|
% token. If the token was quoted, this will be the quote used. If
|
|
% the token is the last one on the line, this will be zero.
|
|
%
|
|
% o next: this variable points to the first character of the
|
|
% next token. it gets reset by "tokenizer" as it steps through the
|
|
% string. Set it to 0 upon initialization, and leave it alone
|
|
% after that. You can change it if you want to jump around in the
|
|
% string or re-parse from the beginning, but be careful.
|
|
%
|
|
% o quoted: set to True if the token was quoted and MagickFalse
|
|
% if not. You may need this information (for example: in C, a
|
|
% string with quotes around it is a character string, while one
|
|
% without is an identifier).
|
|
%
|
|
% o result: 0 if we haven't reached EOS (end of string), and 1
|
|
% if we have.
|
|
%
|
|
*/
|
|
|
|
#define IN_WHITE 0
|
|
#define IN_TOKEN 1
|
|
#define IN_QUOTE 2
|
|
#define IN_OZONE 3
|
|
|
|
static ssize_t sindex(int c,const char *string)
|
|
{
|
|
const char
|
|
*p;
|
|
|
|
for (p=string; *p != '\0'; p++)
|
|
if (c == (int) (*p))
|
|
return((ssize_t) (p-string));
|
|
return(-1);
|
|
}
|
|
|
|
static void StoreToken(TokenInfo *token_info,char *string,
|
|
size_t max_token_length,int c)
|
|
{
|
|
ssize_t
|
|
i;
|
|
|
|
if ((token_info->offset < 0) ||
|
|
((size_t) token_info->offset >= (max_token_length-1)))
|
|
return;
|
|
i=token_info->offset++;
|
|
string[i]=(char) c;
|
|
if (token_info->state == IN_QUOTE)
|
|
return;
|
|
switch (token_info->flag & 0x03)
|
|
{
|
|
case 1:
|
|
{
|
|
string[i]=(char) LocaleToUppercase(c);
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
string[i]=(char) LocaleToLowercase(c);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
|
|
char *token,const size_t max_token_length,const char *line,const char *white,
|
|
const char *break_set,const char *quote,const char escape,char *breaker,
|
|
int *next,char *quoted)
|
|
{
|
|
int
|
|
c;
|
|
|
|
ssize_t
|
|
i;
|
|
|
|
*breaker='\0';
|
|
*quoted='\0';
|
|
if (line[*next] == '\0')
|
|
return(1);
|
|
token_info->state=IN_WHITE;
|
|
token_info->quote=(char) MagickFalse;
|
|
token_info->flag=flag;
|
|
for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
|
|
{
|
|
c=(int) line[*next];
|
|
i=sindex(c,break_set);
|
|
if (i >= 0)
|
|
{
|
|
switch (token_info->state)
|
|
{
|
|
case IN_WHITE:
|
|
case IN_TOKEN:
|
|
case IN_OZONE:
|
|
{
|
|
(*next)++;
|
|
*breaker=break_set[i];
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|
|
case IN_QUOTE:
|
|
{
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
i=sindex(c,quote);
|
|
if (i >= 0)
|
|
{
|
|
switch (token_info->state)
|
|
{
|
|
case IN_WHITE:
|
|
{
|
|
token_info->state=IN_QUOTE;
|
|
token_info->quote=quote[i];
|
|
*quoted=(char) MagickTrue;
|
|
break;
|
|
}
|
|
case IN_QUOTE:
|
|
{
|
|
if (quote[i] != token_info->quote)
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
else
|
|
{
|
|
token_info->state=IN_OZONE;
|
|
token_info->quote='\0';
|
|
}
|
|
break;
|
|
}
|
|
case IN_TOKEN:
|
|
case IN_OZONE:
|
|
{
|
|
*breaker=(char) c;
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
i=sindex(c,white);
|
|
if (i >= 0)
|
|
{
|
|
switch (token_info->state)
|
|
{
|
|
case IN_WHITE:
|
|
case IN_OZONE:
|
|
break;
|
|
case IN_TOKEN:
|
|
{
|
|
token_info->state=IN_OZONE;
|
|
break;
|
|
}
|
|
case IN_QUOTE:
|
|
{
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
if (c == (int) escape)
|
|
{
|
|
if (line[(*next)+1] == '\0')
|
|
{
|
|
*breaker='\0';
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
(*next)++;
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|
|
switch (token_info->state)
|
|
{
|
|
case IN_WHITE:
|
|
{
|
|
(*next)--;
|
|
token_info->state=IN_TOKEN;
|
|
break;
|
|
}
|
|
case IN_TOKEN:
|
|
case IN_QUOTE:
|
|
{
|
|
(*next)++;
|
|
c=(int) line[*next];
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
break;
|
|
}
|
|
case IN_OZONE:
|
|
{
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
switch (token_info->state)
|
|
{
|
|
case IN_WHITE:
|
|
{
|
|
token_info->state=IN_TOKEN;
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
break;
|
|
}
|
|
case IN_TOKEN:
|
|
case IN_QUOTE:
|
|
{
|
|
StoreToken(token_info,token,max_token_length,c);
|
|
break;
|
|
}
|
|
case IN_OZONE:
|
|
{
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|
|
}
|
|
}
|
|
token[token_info->offset]='\0';
|
|
return(0);
|
|
}
|