00001
00002
00003
00004
00005
00006 #include "eparser.h"
00007
00008 #include <cstring>
00009 #include <cmath>
00010
00011 #include "lexer.h"
00012 #include "common.h"
00013
00014 #include "logic.h"
00015 #include "data.h"
00016 #include "fit.h"
00017 #include "var.h"
00018 #include "func.h"
00019 #include "model.h"
00020
00021 using namespace std;
00022
00023 namespace {
00024
00025 int get_op_priority(int op)
00026 {
00027 switch (op) {
00028 case OP_POW: return 9;
00029 case OP_NEG: return 8;
00030 case OP_MUL: return 7;
00031 case OP_DIV: return 7;
00032 case OP_ADD: return 6;
00033 case OP_SUB: return 6;
00034 case OP_GT: return 5;
00035 case OP_GE: return 5;
00036 case OP_LT: return 5;
00037 case OP_LE: return 5;
00038 case OP_EQ: return 5;
00039 case OP_NEQ: return 5;
00040 case OP_NOT: return 4;
00041 case OP_AFTER_AND: return 3;
00042 case OP_AFTER_OR: return 2;
00043 case OP_TERNARY_MID: return 1;
00044 case OP_AFTER_TERNARY: return 1;
00045 default: return 0;
00046 }
00047 }
00048
00049 const char* function_name(int op)
00050 {
00051 switch (op) {
00052
00053 case OP_SQRT: return "sqrt";
00054 case OP_GAMMA: return "gamma";
00055 case OP_LGAMMA: return "lgamma";
00056 case OP_ERFC: return "erfc";
00057 case OP_ERF: return "erf";
00058 case OP_EXP: return "exp";
00059 case OP_LOG10: return "log10";
00060 case OP_LN: return "ln";
00061 case OP_SINH: return "sinh";
00062 case OP_COSH: return "cosh";
00063 case OP_TANH: return "tanh";
00064 case OP_SIN: return "sin";
00065 case OP_COS: return "cos";
00066 case OP_TAN: return "tan";
00067 case OP_ATAN: return "atan";
00068 case OP_ASIN: return "asin";
00069 case OP_ACOS: return "acos";
00070 case OP_ABS: return "abs";
00071 case OP_ROUND: return "round";
00072 case OP_XINDEX: return "index";
00073 case OP_DT_SUM_SAME_X: return "sum_same_x";
00074 case OP_DT_AVG_SAME_X: return "avg_same_x";
00075 case OP_DT_SHIRLEY_BG: return "shirley_bg";
00076
00077 case OP_MOD: return "mod";
00078 case OP_MIN2: return "min2";
00079 case OP_MAX2: return "max2";
00080 case OP_VOIGT: return "voigt";
00081 case OP_DVOIGT_DX: return "dvoigt_dx";
00082 case OP_DVOIGT_DY: return "dvoigt_dy";
00083 case OP_RANDNORM: return "randnormal";
00084 case OP_RANDU: return "randuniform";
00085
00086 case OP_FUNC: return "%function";
00087 case OP_SUM_F: return "F";
00088 case OP_SUM_Z: return "Z";
00089 default: return "";
00090 }
00091 }
00092
00093 int get_function_narg(int op)
00094 {
00095 switch (op) {
00096
00097 case OP_SQRT:
00098 case OP_GAMMA:
00099 case OP_LGAMMA:
00100 case OP_ERFC:
00101 case OP_ERF:
00102 case OP_EXP:
00103 case OP_LOG10:
00104 case OP_LN:
00105 case OP_SINH:
00106 case OP_COSH:
00107 case OP_TANH:
00108 case OP_SIN:
00109 case OP_COS:
00110 case OP_TAN:
00111 case OP_ATAN:
00112 case OP_ASIN:
00113 case OP_ACOS:
00114 case OP_ABS:
00115 case OP_ROUND:
00116 case OP_XINDEX:
00117 case OP_DT_SUM_SAME_X:
00118 case OP_DT_AVG_SAME_X:
00119 case OP_DT_SHIRLEY_BG:
00120 return 1;
00121
00122 case OP_MOD:
00123 case OP_MIN2:
00124 case OP_MAX2:
00125 case OP_VOIGT:
00126 case OP_DVOIGT_DX:
00127 case OP_DVOIGT_DY:
00128 case OP_RANDNORM:
00129 case OP_RANDU:
00130 return 2;
00131
00132 case OP_FUNC:
00133 case OP_SUM_F:
00134 case OP_SUM_Z:
00135 return 1;
00136
00137 case OP_NUMAREA:
00138 case OP_FINDX:
00139 return 3;
00140 case OP_FIND_EXTR:
00141 return 2;
00142 default:
00143 return 0;
00144 }
00145 }
00146
00147 bool is_function(int op)
00148 {
00149 return (bool) get_function_narg(op);
00150 }
00151
00152 bool is_array_var(int op)
00153 {
00154 switch (op) {
00155 case OP_Px:
00156 case OP_Py:
00157 case OP_Ps:
00158 case OP_Pa:
00159 case OP_PX:
00160 case OP_PY:
00161 case OP_PS:
00162 case OP_PA:
00163 return true;
00164 default:
00165 return false;
00166 }
00167 }
00168
00169
00170 class AggregSum : public AggregFunc
00171 {
00172 protected:
00173 virtual void op(double x, int)
00174 {
00175 v_ += x;
00176 }
00177 };
00178
00179 class AggregCount : public AggregFunc
00180 {
00181 protected:
00182 virtual void op(double x, int)
00183 {
00184 if (fabs(x) >= 0.5)
00185 v_ += 1;
00186 }
00187 };
00188
00189 class AggregMin : public AggregFunc
00190 {
00191 protected:
00192 virtual void op(double x, int)
00193 {
00194 if (counter_ == 1 || x < v_)
00195 v_ = x;
00196 }
00197 };
00198
00199 class AggregMax : public AggregFunc
00200 {
00201 protected:
00202 virtual void op(double x, int)
00203 {
00204 if (counter_ == 1 || x > v_)
00205 v_ = x;
00206 }
00207 };
00208
00209 class AggregArgMin : public AggregFunc
00210 {
00211 public:
00212 AggregArgMin(const vector<Point>& points) : points_(points) {}
00213 protected:
00214 virtual void op(double x, int n)
00215 {
00216 if (counter_ == 1 || x < min_) {
00217 min_ = x;
00218 v_ = points_[n].x;
00219 }
00220 }
00221 private:
00222 double min_;
00223 const vector<Point>& points_;
00224 };
00225
00226 class AggregArgMax : public AggregFunc
00227 {
00228 public:
00229 AggregArgMax(const vector<Point>& points) : points_(points) {}
00230 protected:
00231 virtual void op(double x, int n)
00232 {
00233 if (counter_ == 1 || x > max_) {
00234 max_ = x;
00235 v_ = points_[n].x;
00236 }
00237 }
00238 private:
00239 double max_;
00240 const vector<Point>& points_;
00241 };
00242
00243 class AggregDArea : public AggregFunc
00244 {
00245 public:
00246 AggregDArea(const vector<Point>& points) : points_(points) {}
00247 protected:
00248 virtual void op(double x, int n)
00249 {
00250 int M = points_.size();
00251 double dx = (points_[min(n+1, M-1)].x - points_[max(n-1, 0)].x) / 2.;
00252 v_ += x * dx;
00253 }
00254 private:
00255 const vector<Point>& points_;
00256 };
00257
00258 class AggregAvg : public AggregFunc
00259 {
00260 protected:
00261 virtual void op(double x, int)
00262 {
00263 v_ += (x - v_) / counter_;
00264 }
00265 };
00266
00267 class AggregStdDev : public AggregFunc
00268 {
00269 public:
00270 AggregStdDev() : mean_(0.) {}
00271 protected:
00272 double mean_;
00273
00274 virtual void op(double x, int)
00275 {
00276
00277 double delta = x - mean_;
00278 mean_ += delta / counter_;
00279 v_ += delta * (x - mean_);
00280 }
00281
00282 virtual double value() const { return sqrt(v_ / (counter_ - 1)); }
00283 };
00284
00285 }
00286
00287
00288 void ExpressionParser::pop_onto_que()
00289 {
00290 int op = opstack_.back();
00291 opstack_.pop_back();
00292 vm_.append_code(op);
00293 }
00294
00295 void ExpressionParser::put_number(double value)
00296 {
00297 if (expected_ == kOperator) {
00298 finished_ = true;
00299 return;
00300 }
00301
00302 vm_.append_number(value);
00303 expected_ = kOperator;
00304 }
00305
00306 void ExpressionParser::put_unary_op(Op op)
00307 {
00308 if (expected_ == kOperator) {
00309 finished_ = true;
00310 return;
00311 }
00312 opstack_.push_back(op);
00313 expected_ = kValue;
00314 }
00315
00316 void ExpressionParser::put_binary_op(Op op)
00317 {
00318 if (expected_ != kOperator) {
00319 finished_ = true;
00320 return;
00321 }
00322
00323 int pri = get_op_priority(op);
00324 while (!opstack_.empty() && get_op_priority(opstack_.back()) >= pri)
00325 pop_onto_que();
00326 opstack_.push_back(op);
00327 expected_ = kValue;
00328 }
00329
00330 void ExpressionParser::put_function(Op op)
00331 {
00332
00333 opstack_.push_back(0);
00334 opstack_.push_back(op);
00335 expected_ = kValue;
00336 }
00337
00338 void ExpressionParser::put_ag_function(Lexer& lex, int ds, AggregFunc& ag)
00339 {
00340
00341 lex.get_expected_token(kTokenOpen);
00342 ExpressionParser ep(F_);
00343 ep.parse_expr(lex, ds);
00344 const vector<Point>& points = F_->get_data(ds)->points();
00345 Token t = lex.get_expected_token(kTokenClose, "if");
00346 if (t.type == kTokenClose) {
00347 for (size_t n = 0; n != points.size(); ++n) {
00348 double x = ep.calculate(n, points);
00349 ag.put(x, n);
00350 }
00351 }
00352 else {
00353 ExpressionParser cond_p(F_);
00354 cond_p.parse_expr(lex, ds);
00355 lex.get_expected_token(kTokenClose);
00356 for (size_t n = 0; n != points.size(); ++n) {
00357 double c = cond_p.calculate(n, points);
00358 if (fabs(c) >= 0.5) {
00359 double x = ep.calculate(n, points);
00360 ag.put(x, n);
00361 }
00362 }
00363 }
00364 put_number(ag.value());
00365 }
00366
00367 void ExpressionParser::put_value_from_curly(Lexer& lex, int ds)
00368 {
00369 ExpressionParser ep(F_);
00370 ep.parse_expr(lex, ds);
00371 lex.get_expected_token(kTokenRCurly);
00372 double x = ep.calculate(0, F_->get_data(ds)->points());
00373 put_number(x);
00374 }
00375
00376 void ExpressionParser::put_array_var(bool has_index, Op op)
00377 {
00378 if (has_index) {
00379 opstack_.push_back(op);
00380 expected_ = kIndex;
00381 }
00382 else {
00383 vm_.append_code(OP_Pn);
00384 vm_.append_code(op);
00385 expected_ = kOperator;
00386 }
00387 }
00388
00389 void ExpressionParser::put_variable_sth(Lexer& lex, const string& name,
00390 bool ast_mode)
00391 {
00392 if (F_ == NULL)
00393 lex.throw_syntax_error("$variables can not be used here");
00394 const Variable *v = F_->find_variable(name);
00395 if (lex.peek_token().type == kTokenDot) {
00396 lex.get_token();
00397 lex.get_expected_token("error");
00398 double e = F_->get_fit_container()->get_standard_error(v);
00399 if (e == -1.)
00400 lex.throw_syntax_error("unknown error of $" + v->name
00401 + "; it is not simple variable");
00402 put_number(e);
00403 }
00404 else {
00405 if (ast_mode) {
00406 int n = F_->find_variable_nr(name);
00407 vm_.append_code(OP_SYMBOL);
00408 vm_.append_code(n);
00409 expected_ = kOperator;
00410 }
00411 else
00412 put_number(v->get_value());
00413 }
00414 }
00415
00416 void ExpressionParser::put_func_sth(Lexer& lex, const string& name,
00417 bool ast_mode)
00418 {
00419 if (F_ == NULL)
00420 lex.throw_syntax_error("%functions can not be used here");
00421 if (lex.peek_token().type == kTokenOpen) {
00422 int n = F_->find_function_nr(name);
00423 if (n == -1)
00424 throw ExecuteError("undefined function: %" + name);
00425
00426 opstack_.push_back(n);
00427 put_function(OP_FUNC);
00428 }
00429 else if (lex.peek_token().type == kTokenDot) {
00430 lex.get_token();
00431 Token arg = lex.get_expected_token(kTokenLname, kTokenCname);
00432 string word = arg.as_string();
00433 if (arg.type == kTokenCname) {
00434 const Function *f = F_->find_function(name);
00435 double val = f->get_param_value(word);
00436 put_number(val);
00437 }
00438 else if (lex.peek_token().type == kTokenOpen) {
00439 int n = F_->find_function_nr(name);
00440 if (n == -1)
00441 throw ExecuteError("undefined function: %" + name);
00442
00443 opstack_.push_back(n);
00444 opstack_.push_back(OP_FUNC);
00445 if (word == "numarea")
00446 put_function(OP_NUMAREA);
00447 else if (word == "findx")
00448 put_function(OP_FINDX);
00449 else if (word == "extremum")
00450 put_function(OP_FIND_EXTR);
00451 else
00452 lex.throw_syntax_error("unknown method of F/Z");
00453 }
00454 else {
00455 const Function *f = F_->find_function(name);
00456 string v = f->get_var_name(f->get_param_nr(word));
00457 put_variable_sth(lex, v, ast_mode);
00458 }
00459 }
00460 else
00461 lex.throw_syntax_error("expected '.' or '(' after %function");
00462 }
00463
00464 void ExpressionParser::put_fz_sth(Lexer& lex, char fz, int ds, bool ast_mode)
00465 {
00466 if (F_ == NULL || ds < 0)
00467 lex.throw_syntax_error("F/Z can not be used here");
00468 if (lex.peek_token().type == kTokenLSquare) {
00469 lex.get_token();
00470 ExpressionParser ep(F_);
00471 ep.parse_expr(lex, ds);
00472 lex.get_expected_token(kTokenRSquare);
00473 int idx = iround(ep.calculate());
00474 const string& name = F_->get_model(ds)->get_func_name(fz, idx);
00475 put_func_sth(lex, name, ast_mode);
00476 }
00477 else if (lex.peek_token().type == kTokenOpen) {
00478 opstack_.push_back(ds);
00479 put_function(fz == 'F' ? OP_SUM_F : OP_SUM_Z);
00480 }
00481 else if (lex.peek_token().type == kTokenDot) {
00482 lex.get_token();
00483 string word = lex.get_expected_token(kTokenLname).as_string();
00484 if (lex.peek_token().type != kTokenOpen)
00485 lex.throw_syntax_error("F/Z has no .properties, only .methods()");
00486
00487 opstack_.push_back(ds);
00488 opstack_.push_back(fz == 'F' ? OP_SUM_F : OP_SUM_Z);
00489 if (word == "numarea")
00490 put_function(OP_NUMAREA);
00491 else if (word == "findx")
00492 put_function(OP_FINDX);
00493 else if (word == "extremum")
00494 put_function(OP_FIND_EXTR);
00495 else
00496 lex.throw_syntax_error("unknown method of F/Z");
00497 }
00498 else {
00499 lex.throw_syntax_error("unexpected token after F/Z");
00500 }
00501 }
00502
00503 void ExpressionParser::put_name(Lexer& lex,
00504 const string& word,
00505 const vector<string>* custom_vars,
00506 vector<string>* new_vars,
00507 bool ast_mode)
00508 {
00509 if (word == "pi") {
00510 put_number(M_PI);
00511 return;
00512 }
00513 if (word == "true") {
00514 put_number(1.);
00515 return;
00516 }
00517 if (word == "false") {
00518 put_number(0.);
00519 return;
00520 }
00521
00522 if (ast_mode && word == "x") {
00523 vm_.append_code(OP_X);
00524 expected_ = kOperator;
00525 return;
00526 }
00527
00528 if (custom_vars != NULL) {
00529 int idx = index_of_element(*custom_vars, word);
00530 if (idx != -1) {
00531 vm_.append_code(OP_SYMBOL);
00532 vm_.append_code(idx);
00533 expected_ = kOperator;
00534 return;
00535 }
00536 }
00537
00538 if (new_vars != NULL) {
00539 int idx = index_of_element(*new_vars, word);
00540 if (idx == -1) {
00541 idx = new_vars->size();
00542 new_vars->push_back(word);
00543 }
00544 vm_.append_code(OP_SYMBOL);
00545
00546 int cv_len = custom_vars != NULL ? (int) custom_vars->size() : 0;
00547 vm_.append_code(cv_len + idx);
00548 expected_ = kOperator;
00549 return;
00550 }
00551
00552
00553 if (custom_vars == NULL && new_vars == NULL && !ast_mode) {
00554 bool has_index = (lex.peek_token().type == kTokenLSquare);
00555 if (word.size() == 1 && (word[0] == 'x' || word[0] == 'y' ||
00556 word[0] == 's' || word[0] == 'a' || word[0] == 'n')) {
00557 if (word[0] == 'x')
00558 put_array_var(has_index, OP_Px);
00559 else if (word[0] == 'y')
00560 put_array_var(has_index, OP_Py);
00561 else if (word[0] == 's')
00562 put_array_var(has_index, OP_Ps);
00563 else if (word[0] == 'a')
00564 put_array_var(has_index, OP_Pa);
00565 else if (word[0] == 'n') {
00566 vm_.append_code(OP_Pn);
00567 expected_ = kOperator;
00568 }
00569 return;
00570 }
00571 }
00572
00573 lex.throw_syntax_error("unknown name: " + word);
00574 }
00575
00576 void ExpressionParser::pop_until_bracket()
00577 {
00578 while (!opstack_.empty()) {
00579 int op = opstack_.back();
00580 if (op == OP_OPEN_ROUND || op == OP_OPEN_SQUARE || op == OP_TERNARY_MID)
00581 break;
00582 opstack_.pop_back();
00583 vm_.append_code(op);
00584 }
00585 }
00586
00587 bool ExpressionParser::parse_full(Lexer& lex, int default_ds,
00588 const vector<string> *custom_vars)
00589 {
00590 try {
00591 parse_expr(lex, default_ds, custom_vars);
00592 }
00593 catch (...) {
00594 return false;
00595 }
00596 return lex.peek_token().type == kTokenNop;
00597 }
00598
00599
00600 void ExpressionParser::parse_expr(Lexer& lex, int default_ds,
00601 const vector<string> *custom_vars,
00602 vector<string> *new_vars,
00603 ParseMode mode)
00604 {
00605 opstack_.clear();
00606 finished_ = false;
00607 expected_ = kValue;
00608 if (F_ != NULL && default_ds >= F_->get_dm_count())
00609 lex.throw_syntax_error("wrong dataset index");
00610 while (!finished_) {
00611 const Token token = lex.get_token();
00612
00613 switch (token.type) {
00614 case kTokenNumber:
00615 put_number(token.value.d);
00616 break;
00617 case kTokenLname: {
00618 string word = token.as_string();
00619
00620 if (word == "not")
00621 put_unary_op(OP_NOT);
00622 else if (word == "and") {
00623 put_binary_op(OP_AFTER_AND);
00624 vm_.append_code(OP_AND);
00625 }
00626 else if (word == "or") {
00627 put_binary_op(OP_AFTER_OR);
00628 vm_.append_code(OP_OR);
00629 }
00630 else if (word == "if") {
00631 pop_until_bracket();
00632 if (expected_ == kOperator && opstack_.empty())
00633 finished_ = true;
00634 else
00635 lex.throw_syntax_error("unexpected `if'");
00636 }
00637 else if (lex.peek_token().type == kTokenOpen) {
00638 if (expected_ == kOperator) {
00639 finished_ = true;
00640 break;
00641 }
00642
00643 if (word == "sqrt")
00644 put_function(OP_SQRT);
00645 else if (word == "gamma")
00646 put_function(OP_GAMMA);
00647 else if (word == "lgamma")
00648 put_function(OP_LGAMMA);
00649 else if (word == "erfc")
00650 put_function(OP_ERFC);
00651 else if (word == "erf")
00652 put_function(OP_ERF);
00653 else if (word == "exp")
00654 put_function(OP_EXP);
00655 else if (word == "log10")
00656 put_function(OP_LOG10);
00657 else if (word == "ln")
00658 put_function(OP_LN);
00659 else if (word == "sinh")
00660 put_function(OP_SINH);
00661 else if (word == "cosh")
00662 put_function(OP_COSH);
00663 else if (word == "tanh")
00664 put_function(OP_TANH);
00665 else if (word == "sin")
00666 put_function(OP_SIN);
00667 else if (word == "cos")
00668 put_function(OP_COS);
00669 else if (word == "tan")
00670 put_function(OP_TAN);
00671 else if (word == "atan")
00672 put_function(OP_ATAN);
00673 else if (word == "asin")
00674 put_function(OP_ASIN);
00675 else if (word == "acos")
00676 put_function(OP_ACOS);
00677 else if (word == "abs")
00678 put_function(OP_ABS);
00679 else if (word == "round")
00680 put_function(OP_ROUND);
00681 else if (word == "index")
00682 put_function(OP_XINDEX);
00683
00684 else if (word == "mod")
00685 put_function(OP_MOD);
00686 else if (word == "min2")
00687 put_function(OP_MIN2);
00688 else if (word == "max2")
00689 put_function(OP_MAX2);
00690 else if (word == "voigt")
00691 put_function(OP_VOIGT);
00692 else if (word == "randnormal")
00693 put_function(OP_RANDNORM);
00694 else if (word == "randuniform")
00695 put_function(OP_RANDU);
00696
00697 else if (word == "sum") {
00698 AggregSum ag;
00699 put_ag_function(lex, default_ds, ag);
00700 }
00701 else if (word == "count") {
00702 AggregCount ag;
00703 put_ag_function(lex, default_ds, ag);
00704 }
00705 else if (word == "min") {
00706 AggregMin ag;
00707 put_ag_function(lex, default_ds, ag);
00708 }
00709 else if (word == "max") {
00710 AggregMax ag;
00711 put_ag_function(lex, default_ds, ag);
00712 }
00713 else if (word == "argmin") {
00714 AggregArgMin ag(F_->get_data(default_ds)->points());
00715 put_ag_function(lex, default_ds, ag);
00716 }
00717 else if (word == "argmax") {
00718 AggregArgMax ag(F_->get_data(default_ds)->points());
00719 put_ag_function(lex, default_ds, ag);
00720 }
00721 else if (word == "avg") {
00722 AggregAvg ag;
00723 put_ag_function(lex, default_ds, ag);
00724 }
00725 else if (word == "stddev") {
00726 AggregStdDev ag;
00727 put_ag_function(lex, default_ds, ag);
00728 }
00729 else if (word == "darea") {
00730 if (F_ == NULL)
00731 lex.throw_syntax_error("darea: unknown @dataset");
00732 AggregDArea ag(F_->get_data(default_ds)->points());
00733 put_ag_function(lex, default_ds, ag);
00734 }
00735
00736
00737 else if (mode == kDatasetTrMode && word == "sum_same_x")
00738 put_function(OP_DT_SUM_SAME_X);
00739 else if (mode == kDatasetTrMode && word == "avg_same_x")
00740 put_function(OP_DT_AVG_SAME_X);
00741 else if (mode == kDatasetTrMode && word == "shirley_bg")
00742 put_function(OP_DT_SHIRLEY_BG);
00743
00744 else
00745 lex.throw_syntax_error("unknown function: " + word);
00746 }
00747 else {
00748 if (expected_ == kOperator) {
00749 finished_ = true;
00750 break;
00751 }
00752 put_name(lex, word, custom_vars, new_vars, mode==kAstMode);
00753 }
00754 break;
00755 }
00756 case kTokenUletter: {
00757 if (expected_ == kOperator) {
00758 finished_ = true;
00759 break;
00760 }
00761 bool has_index = (lex.peek_token().type == kTokenLSquare);
00762 if (*token.str == 'X')
00763 put_array_var(has_index, OP_PX);
00764 else if (*token.str == 'Y')
00765 put_array_var(has_index, OP_PY);
00766 else if (*token.str == 'S')
00767 put_array_var(has_index, OP_PS);
00768 else if (*token.str == 'A')
00769 put_array_var(has_index, OP_PA);
00770 else if (*token.str == 'M') {
00771 vm_.append_code(OP_PM);
00772 expected_ = kOperator;
00773 }
00774 else if (*token.str == 'F' || *token.str == 'Z') {
00775 put_fz_sth(lex, *token.str, default_ds, mode==kAstMode);
00776 }
00777 else
00778 lex.throw_syntax_error("unknown name: "+ token.as_string());
00779 break;
00780 }
00781 case kTokenDataset: {
00782 if (expected_ == kOperator) {
00783 finished_ = true;
00784 break;
00785 }
00786 if (lex.peek_token().type == kTokenDot) {
00787 lex.get_token();
00788 Token t = lex.get_expected_token(kTokenUletter);
00789 if (*t.str == 'F' || *t.str == 'Z') {
00790 put_fz_sth(lex, *t.str, token.value.i, mode==kAstMode);
00791 }
00792 else
00793 lex.throw_syntax_error("unknown name: " +
00794 token.as_string());
00795 }
00796 else {
00797 if (mode != kDatasetTrMode)
00798 lex.get_expected_token(kTokenDot);
00799 int n = token.value.i;
00800 if (n == Lexer::kAll || n == Lexer::kNew)
00801 lex.throw_syntax_error("@*/@+ not allowed at RHS");
00802 vm_.append_code(OP_DATASET);
00803 vm_.append_code(n);
00804 expected_ = kOperator;
00805 }
00806 break;
00807 }
00808 case kTokenOpen:
00809 if (expected_ == kOperator) {
00810 finished_ = true;
00811 break;
00812 }
00813 opstack_.push_back(OP_OPEN_ROUND);
00814 expected_ = kValue;
00815 break;
00816 case kTokenLSquare:
00817 if (expected_ != kIndex) {
00818 finished_ = true;
00819 break;
00820 }
00821 opstack_.push_back(OP_OPEN_SQUARE);
00822 expected_ = kValue;
00823 break;
00824
00825 case kTokenClose:
00826 pop_until_bracket();
00827 if (opstack_.empty()) {
00828 finished_ = true;
00829 break;
00830 }
00831 else if (opstack_.back() == OP_OPEN_SQUARE)
00832 lex.throw_syntax_error("mismatching '[' and ')'");
00833 else if (opstack_.back() == OP_TERNARY_MID)
00834 lex.throw_syntax_error("mismatching '?' and ')'");
00835
00836 opstack_.pop_back();
00837
00838
00839 if (!opstack_.empty()) {
00840 int top = opstack_.back();
00841 if (is_function(top)) {
00842 pop_onto_que();
00843 int n = opstack_.back() + 1;
00844 opstack_.pop_back();
00845 int expected_n = get_function_narg(top);
00846 if (n != expected_n)
00847 lex.throw_syntax_error(
00848 S("function ") + function_name(top) + " expects "
00849 + S(expected_n) + " arguments, not " + S(n));
00850 if (top==OP_FUNC || top==OP_SUM_F || top==OP_SUM_Z)
00851 pop_onto_que();
00852 else if (top==OP_NUMAREA || top==OP_FINDX ||
00853 top==OP_FIND_EXTR) {
00854 pop_onto_que();
00855 pop_onto_que();
00856 }
00857 }
00858 }
00859
00860 expected_ = kOperator;
00861 break;
00862
00863 case kTokenComma:
00864 pop_until_bracket();
00865 if (opstack_.empty()) {
00866 finished_ = true;
00867 break;
00868 }
00869 else if (opstack_.back() == OP_OPEN_SQUARE)
00870 lex.throw_syntax_error("unexpected ',' after '['");
00871 else if (opstack_.back() == OP_TERNARY_MID)
00872 lex.throw_syntax_error("unexpected ',' after '?'");
00873
00874 else if (opstack_.size() < 3 ||
00875 !is_function(*(opstack_.end() - 2)))
00876 lex.throw_syntax_error("',' outside of function");
00877 else
00878
00879 ++ *(opstack_.end() - 3);
00880 expected_ = kValue;
00881 break;
00882
00883 case kTokenRSquare:
00884 pop_until_bracket();
00885 if (opstack_.empty()) {
00886 finished_ = true;
00887 break;
00888 }
00889 else if (opstack_.back() == OP_OPEN_ROUND)
00890 lex.throw_syntax_error("mismatching '(' and ']'");
00891 else if (opstack_.back() == OP_TERNARY_MID)
00892 lex.throw_syntax_error("mismatching '?' and ']'");
00893
00894 opstack_.pop_back();
00895 if (opstack_.empty() || !is_array_var(opstack_.back()))
00896 lex.throw_syntax_error("[index] can be used only after "
00897 "x, y, s, a, X, Y, S or A.");
00898 pop_onto_que();
00899 expected_ = kOperator;
00900 break;
00901
00902 case kTokenNop:
00903 finished_ = true;
00904 break;
00905 case kTokenPower:
00906 put_binary_op(OP_POW);
00907 break;
00908 case kTokenMult:
00909 put_binary_op(OP_MUL);
00910 break;
00911 case kTokenDiv:
00912 put_binary_op(OP_DIV);
00913 break;
00914 case kTokenPlus:
00915 if (expected_ == kOperator)
00916 put_binary_op(OP_ADD);
00917 else
00918 {}
00919 break;
00920 case kTokenMinus:
00921 if (expected_ == kOperator)
00922 put_binary_op(OP_SUB);
00923 else if (lex.peek_token().type == kTokenNumber) {
00924
00925
00926
00927
00928 Token num = lex.get_token();
00929 if (lex.peek_token().type != kTokenPower) {
00930 put_number(-num.value.d);
00931 }
00932 else {
00933 put_unary_op(OP_NEG);
00934 put_number(num.value.d);
00935 }
00936 }
00937 else
00938 put_unary_op(OP_NEG);
00939 break;
00940 case kTokenGT:
00941
00942
00943
00944 if (lex.peek_token().type == kTokenString)
00945 finished_ = true;
00946 else
00947 put_binary_op(OP_GT);
00948 break;
00949 case kTokenGE:
00950 put_binary_op(OP_GE);
00951 break;
00952 case kTokenLT:
00953 put_binary_op(OP_LT);
00954 break;
00955 case kTokenLE:
00956 put_binary_op(OP_LE);
00957 break;
00958 case kTokenEQ:
00959 put_binary_op(OP_EQ);
00960 break;
00961 case kTokenNE:
00962 put_binary_op(OP_NEQ);
00963 break;
00964 case kTokenQMark:
00965
00966
00967 if (lex.peek_token().type == kTokenCname) {
00968 finished_ = true;
00969 break;
00970 }
00971 put_binary_op(OP_TERNARY_MID);
00972 vm_.append_code(OP_TERNARY);
00973 break;
00974 case kTokenColon:
00975 for (;;) {
00976 if (opstack_.empty()) {
00977 finished_ = true;
00978 break;
00979 }
00980
00981 int op = opstack_.back();
00982 opstack_.pop_back();
00983 vm_.append_code(op);
00984 if (op == OP_TERNARY_MID)
00985 break;
00986 }
00987 if (!finished_)
00988 put_binary_op(OP_AFTER_TERNARY);
00989 break;
00990 case kTokenVarname:
00991 put_variable_sth(lex, Lexer::get_string(token), mode==kAstMode);
00992 break;
00993 case kTokenFuncname:
00994 put_func_sth(lex, Lexer::get_string(token), mode==kAstMode);
00995 break;
00996
00997 case kTokenTilde:
00998 if (expected_ == kOperator)
00999 lex.throw_syntax_error("unexpected `~'");
01000 vm_.append_code(OP_TILDE);
01001 break;
01002
01003 case kTokenLCurly:
01004 put_value_from_curly(lex, default_ds);
01005 break;
01006
01007 case kTokenString:
01008 case kTokenCname:
01009 case kTokenBang:
01010 case kTokenAppend:
01011 case kTokenAddAssign:
01012 case kTokenSubAssign:
01013 case kTokenDots:
01014 case kTokenPlusMinus:
01015 case kTokenRCurly:
01016 case kTokenAssign:
01017 case kTokenSemicolon:
01018 case kTokenDot:
01019 finished_ = true;
01020 break;
01021
01022
01023 case kTokenFilename:
01024 case kTokenExpr:
01025 case kTokenEVar:
01026 case kTokenRest:
01027 assert(0);
01028 break;
01029 }
01030
01031 if (finished_ && token.type != kTokenNop)
01032 lex.go_back(token);
01033 }
01034
01035 if (expected_ != kOperator)
01036 lex.throw_syntax_error("unexpected token or end of expression");
01037
01038
01039 pop_until_bracket();
01040 if (!opstack_.empty())
01041 lex.throw_syntax_error("mismatching bracket");
01042 }
01043
01044 void ExpressionParser::push_assign_lhs(const Token& t)
01045 {
01046 Op op;
01047 switch (toupper(*t.str)) {
01048 case 'X': op = OP_ASSIGN_X; break;
01049 case 'Y': op = OP_ASSIGN_Y; break;
01050 case 'S': op = OP_ASSIGN_S; break;
01051 case 'A': op = OP_ASSIGN_A; break;
01052 default: assert(0);
01053 }
01054 vm_.append_code(op);
01055 }
01056