1: <?php
2:
3: namespace Alchemy\util;
4:
5:
6: /**
7: * Parse a data type definition into it's various components.
8: * For example:
9: * >>> $type = new DataTypeLexer("Integer(11, primary_key = true)");
10: * >>> $type->getType();
11: * "Integer"
12: * >>> $type->getArgs();
13: * array(11, "primary_key" => true)
14: */
15: class DataTypeLexer {
16: const T_EQUALS = '=';
17:
18: private $index = 0;
19: private $definition;
20: private $type;
21: private $args;
22:
23:
24: /**
25: * Object Constructor
26: *
27: * @param string $def Data Type Definition
28: */
29: public function __construct($def) {
30: $this->definition = $def;
31: $this->parse($def);
32: }
33:
34:
35: /**
36: * Get the positional arguments in the definition
37: *
38: * @return array
39: */
40: public function getArgs() {
41: return $this->args;
42: }
43:
44:
45: /**
46: * Get the Data Type form the definition
47: *
48: * @return string
49: */
50: public function getType() {
51: return $this->type;
52: }
53:
54:
55: /**
56: * Lexically analyze the given string into a stream of tokens.
57: *
58: * @param string $def
59: * @return array
60: */
61: protected function lexString($def) {
62: $def = str_split($def);
63: $tokens = array();
64: $inString = false;
65: $buffer = "";
66:
67: while (count($def) > 0) {
68: $char = array_shift($def);
69:
70: // Start / Stop String
71: if (preg_match("/[\"\']/", $char)) {
72: $inString = !$inString;
73: continue;
74: }
75:
76: // Escape character?
77: if ($inString && $char == "\\") {
78: $buffer .= array_shift($def);
79: continue;
80: }
81:
82: // Push value onto buffer
83: if ($inString || preg_match("/[a-zA-Z0-9_\-.\\\\]/", $char)) {
84: $buffer .= $char;
85: continue;
86: }
87:
88: if (strlen($char) == 0 || $char === ' ') {
89: continue;
90: }
91:
92: // Reached a control char. Record the token and reset the buffer
93: if (strlen($buffer) > 0) {
94: $tokens[] = $this->normalizeToken($buffer);
95: $buffer = "";
96: }
97:
98: $tokens[] = $char;
99: }
100:
101: // Save last buffer
102: if (strlen($buffer) > 0) {
103: $tokens[] = $this->normalizeToken($buffer);
104: }
105:
106: return $tokens;
107: }
108:
109:
110: /**
111: * Normalize the given token into PHP values
112: *
113: * @param string $token
114: * @return mixed
115: */
116: protected function normalizeToken($token) {
117: switch (true) {
118: // Boolean True?
119: case $token === 'true':
120: return true;
121:
122: // Boolean False?
123: case $token === 'false':
124: return false;
125:
126: // Integer?
127: case (string)(int)$token == $token:
128: return (int)$token;
129:
130: // Float?
131: case is_numeric($token):
132: return (float)$token;
133: }
134:
135: // Just a string.
136: return $token;
137: }
138:
139:
140: /**
141: * Lex and parse the given token, saving it's data into the object
142: *
143: * @param string $def
144: */
145: protected function parse($def) {
146: $tokens = $this->lexString($def);
147: $this->type = array_shift($tokens);
148: $stack = array(array());
149:
150: while ($token = array_shift($tokens)) {
151: // Ignore controls
152: if (preg_match("/[\(\)\,]/", $token)) {
153: continue;
154: }
155:
156: // array stack control
157: if ($token == '[') {
158: array_unshift($stack, array());
159: continue;
160: } elseif ($token == ']') {
161: $top = array_shift($stack);
162: $stack[0][] = $top;
163: continue;
164: }
165:
166: if (reset($tokens) == static::T_EQUALS) {
167: array_shift($tokens); // Swallow the assignment
168: $value = array_shift($tokens);
169: $stack[0][$token] = $value;
170: } else {
171: $stack[0][] = $token;
172: }
173: }
174:
175: if (count($stack) != 1) {
176: throw new \Exception("Definition '$def' contains unmatched [ brackets ].");
177: }
178:
179: $this->args = array_shift($stack);
180: }
181: }
182: