00001
00002
00003
00004 #include <algorithm>
00005 #include <math.h>
00006 #include <string.h>
00007 #include <boost/math/distributions/students_t.hpp>
00008
00009 #include "fit.h"
00010 #include "logic.h"
00011 #include "model.h"
00012 #include "data.h"
00013 #include "ui.h"
00014 #include "numfuncs.h"
00015 #include "settings.h"
00016 #include "var.h"
00017 #include "LMfit.h"
00018 #include "GAfit.h"
00019 #include "NMfit.h"
00020
00021 using namespace std;
00022
00023 Fit::Fit(Ftk *F, const string& m)
00024 : name(m), F_(F),
00025 evaluations_(0), iter_nr_(0), na_(0), last_refresh_time_(0)
00026 {
00027 }
00028
00029
00030 int Fit::get_dof(const vector<DataAndModel*>& dms)
00031 {
00032 update_parameters(dms);
00033 int dof = 0;
00034 v_foreach (DataAndModel*, i, dms)
00035 dof += (*i)->data()->get_n();
00036 dof -= count(par_usage_.begin(), par_usage_.end(), true);
00037 return dof;
00038 }
00039
00040 string Fit::get_goodness_info(const vector<DataAndModel*>& dms)
00041 {
00042 const SettingsMgr *sm = F_->settings_mgr();
00043 const vector<realt>& pp = F_->parameters();
00044 int dof = get_dof(dms);
00045
00046 realt wssr = do_compute_wssr(pp, dms, true);
00047 return "WSSR=" + sm->format_double(wssr)
00048 + " DoF=" + S(dof)
00049 + " WSSR/DoF=" + sm->format_double(wssr/dof)
00050 + " SSR=" + sm->format_double(do_compute_wssr(pp, dms, false))
00051 + " R2=" + sm->format_double(compute_r_squared(pp, dms));
00052 }
00053
00054 vector<realt> Fit::get_covariance_matrix(const vector<DataAndModel*>& dms)
00055 {
00056 const vector<realt> &pp = F_->parameters();
00057 update_parameters(dms);
00058
00059 vector<realt> alpha(na_*na_), beta(na_);
00060 compute_derivatives(pp, dms, alpha, beta);
00061
00062
00063
00064 for (int i = 0; i < na_; ++i)
00065 if (!par_usage_[i]) {
00066 alpha[i*na_ + i] = 1.;
00067 }
00068
00069
00070
00071
00072 vector<int> undef;
00073 for (int i = 0; i < na_; ++i) {
00074 bool has_nonzero = false;
00075 for (int j = 0; j < na_; j++)
00076 if (alpha[na_*i+j] != 0.) {
00077 has_nonzero = true;
00078 break;
00079 }
00080 if (!has_nonzero) {
00081 undef.push_back(i);
00082 alpha[i*na_ + i] = 1.;
00083 }
00084 }
00085
00086 reverse_matrix(alpha, na_);
00087
00088 v_foreach (int, i, undef)
00089 alpha[(*i)*na_ + (*i)] = 0.;
00090
00091 return alpha;
00092 }
00093
00094 vector<realt> Fit::get_standard_errors(const vector<DataAndModel*>& dms)
00095 {
00096 const vector<realt> &pp = F_->parameters();
00097 realt wssr = do_compute_wssr(pp, dms, true);
00098 int dof = get_dof(dms);
00099 vector<realt> alpha = get_covariance_matrix(dms);
00100
00101 vector<realt> errors(na_);
00102 for (int i = 0; i < na_; ++i)
00103 errors[i] = sqrt(wssr / dof * alpha[i*na_ + i]);
00104 return errors;
00105 }
00106
00107 vector<realt> Fit::get_confidence_limits(const vector<DataAndModel*>& dms,
00108 double level_percent)
00109 {
00110 vector<realt> v = get_standard_errors(dms);
00111 int dof = get_dof(dms);
00112 double level = 1. - level_percent / 100.;
00113 boost::math::students_t dist(dof);
00114 double t = boost::math::quantile(boost::math::complement(dist, level/2));
00115 vm_foreach (realt, i, v)
00116 *i *= t;
00117 return v;
00118 }
00119
00120 string Fit::get_cov_info(const vector<DataAndModel*>& dms)
00121 {
00122 string s;
00123 const SettingsMgr *sm = F_->settings_mgr();
00124 vector<realt> alpha = get_covariance_matrix(dms);
00125 s += "\nCovariance matrix\n ";
00126 for (int i = 0; i < na_; ++i)
00127 if (par_usage_[i])
00128 s += "\t$" + F_->find_variable_handling_param(i)->name;
00129 for (int i = 0; i < na_; ++i) {
00130 if (par_usage_[i]) {
00131 s += "\n$" + F_->find_variable_handling_param(i)->name;
00132 for (int j = 0; j < na_; ++j) {
00133 if (par_usage_[j])
00134 s += "\t" + sm->format_double(alpha[na_*i + j]);
00135 }
00136 }
00137 }
00138 return s;
00139 }
00140
00141 realt Fit::do_compute_wssr(const vector<realt> &A,
00142 const vector<DataAndModel*>& dms,
00143 bool weigthed)
00144 {
00145 realt wssr = 0;
00146 F_->use_external_parameters(A);
00147 v_foreach (DataAndModel*, i, dms) {
00148 wssr += compute_wssr_for_data(*i, weigthed);
00149 }
00150 return wssr;
00151 }
00152
00153
00154 realt Fit::compute_wssr_for_data(const DataAndModel* dm, bool weigthed)
00155 {
00156 const Data* data = dm->data();
00157 int n = data->get_n();
00158 vector<realt> xx(n);
00159 for (int j = 0; j < n; j++)
00160 xx[j] = data->get_x(j);
00161 vector<realt> yy(n, 0.);
00162 dm->model()->compute_model(xx, yy);
00163
00164
00165 long double wssr = 0;
00166 for (int j = 0; j < n; j++) {
00167 realt dy = data->get_y(j) - yy[j];
00168 if (weigthed)
00169 dy /= data->get_sigma(j);
00170 wssr += dy * dy;
00171 }
00172 return wssr;
00173 }
00174
00175
00176 realt Fit::compute_r_squared(const vector<realt> &A,
00177 const vector<DataAndModel*>& dms)
00178 {
00179 realt sum_err = 0, sum_tot = 0, se = 0, st = 0;
00180 F_->use_external_parameters(A);
00181 v_foreach (DataAndModel*, i, dms) {
00182 compute_r_squared_for_data(*i, &se, &st);
00183 sum_err += se;
00184 sum_tot += st;
00185 }
00186 return 1 - (sum_err / sum_tot);
00187 }
00188
00189
00190 realt Fit::compute_r_squared_for_data(const DataAndModel* dm,
00191 realt* sum_err, realt* sum_tot)
00192 {
00193 const Data* data = dm->data();
00194 int n = data->get_n();
00195 vector<realt> xx(n);
00196 for (int j = 0; j < n; j++)
00197 xx[j] = data->get_x(j);
00198 vector<realt> yy(n, 0.);
00199 dm->model()->compute_model(xx, yy);
00200 realt ysum = 0;
00201 realt ss_err = 0;
00202 for (int j = 0; j < n; j++) {
00203 ysum += data->get_y(j) ;
00204 realt dy = data->get_y(j) - yy[j];
00205 ss_err += dy * dy ;
00206 }
00207 realt mean = ysum / n;
00208
00209 realt ss_tot = 0;
00210 for (int j = 0; j < n; j++) {
00211 realt dy = data->get_y(j) - mean;
00212 ss_tot += dy * dy;
00213 }
00214
00215 if (sum_err != NULL)
00216 *sum_err = ss_err;
00217 if (sum_tot != NULL)
00218 *sum_tot = ss_tot;
00219
00220
00221
00222 return 1 - (ss_err / ss_tot);
00223 }
00224
00225
00226 void Fit::compute_derivatives(const vector<realt> &A,
00227 const vector<DataAndModel*>& dms,
00228 vector<realt>& alpha, vector<realt>& beta)
00229 {
00230 assert (size(A) == na_ && size(alpha) == na_ * na_ && size(beta) == na_);
00231 fill(alpha.begin(), alpha.end(), 0.0);
00232 fill(beta.begin(), beta.end(), 0.0);
00233
00234 F_->use_external_parameters(A);
00235 v_foreach (DataAndModel*, i, dms) {
00236 compute_derivatives_for(*i, alpha, beta);
00237 }
00238
00239 for (int j = 1; j < na_; j++)
00240 for (int k = 0; k < j; k++)
00241 alpha[na_ * k + j] = alpha[na_ * j + k];
00242 }
00243
00244
00245
00246 void Fit::compute_derivatives_for(const DataAndModel* dm,
00247 vector<realt>& alpha, vector<realt>& beta)
00248 {
00249 const Data* data = dm->data();
00250 int n = data->get_n();
00251 vector<realt> xx(n);
00252 for (int j = 0; j < n; ++j)
00253 xx[j] = data->get_x(j);
00254 vector<realt> yy(n, 0.);
00255 const int dyn = na_+1;
00256 vector<realt> dy_da(n*dyn, 0.);
00257 dm->model()->compute_model_with_derivs(xx, yy, dy_da);
00258 for (int i = 0; i != n; i++) {
00259 realt inv_sig = 1.0 / data->get_sigma(i);
00260 realt dy_sig = (data->get_y(i) - yy[i]) * inv_sig;
00261 vector<realt>::iterator t = dy_da.begin() + i*dyn;
00262 for (int j = 0; j != na_; ++j) {
00263 if (par_usage_[j]) {
00264 *(t+j) *= inv_sig;
00265 for (int k = 0; k <= j; ++k)
00266 alpha[na_ * j + k] += *(t+j) * *(t+k);
00267 beta[j] += dy_sig * *(t+j);
00268 }
00269 }
00270 }
00271 }
00272
00273 string Fit::print_matrix(const vector<realt>& vec, int m, int n,
00274 const char *mname)
00275
00276 {
00277 if (F_->get_verbosity() <= 0)
00278 return "";
00279 assert (size(vec) == m * n);
00280 if (m < 1 || n < 1)
00281 throw ExecuteError("In `print_matrix': It is not a matrix.");
00282 string h = S(mname) + "={ ";
00283 if (m == 1) {
00284 for (int i = 0; i < n; i++)
00285 h += S(vec[i]) + (i < n - 1 ? ", " : " }") ;
00286 }
00287 else {
00288 string blanks (strlen(mname) + 1, ' ');
00289 for (int j = 0; j < m; j++){
00290 if (j > 0)
00291 h += blanks + " ";
00292 for (int i = 0; i < n; i++)
00293 h += S(vec[j * n + i]) + ", ";
00294 h += "\n";
00295 }
00296 h += blanks + "}";
00297 }
00298 return h;
00299 }
00300
00301 bool Fit::post_fit(const vector<realt>& aa, realt chi2)
00302 {
00303 double elapsed = (clock() - start_time_) / (double) CLOCKS_PER_SEC;
00304 F_->msg(name + ": " + S(iter_nr_) + " iterations, "
00305 + S(evaluations_) + " evaluations, "
00306 + format1<double,16>("%.2f", elapsed) + " s. of CPU time.");
00307 bool better = (chi2 < wssr_before_);
00308 const SettingsMgr *sm = F_->settings_mgr();
00309 if (better) {
00310 F_->get_fit_container()->push_param_history(aa);
00311 F_->put_new_parameters(aa);
00312 double percent_change = (chi2 - wssr_before_) / wssr_before_ * 100.;
00313 F_->msg("WSSR: " + sm->format_double(chi2) +
00314 " (" + S(percent_change) + "%)");
00315 }
00316 else {
00317 F_->msg("Better fit NOT found (WSSR = " + sm->format_double(chi2)
00318 + ", was " + sm->format_double(wssr_before_) + ")."
00319 "\nParameters NOT changed");
00320 F_->use_external_parameters(a_orig_);
00321 if (F_->get_settings()->fit_replot)
00322 F_->get_ui()->draw_plot(UserInterface::kRepaintImmediately);
00323 }
00324 return better;
00325 }
00326
00327 realt Fit::draw_a_from_distribution (int nr, char distribution, realt mult)
00328 {
00329 assert (nr >= 0 && nr < na_);
00330 if (!par_usage_[nr])
00331 return a_orig_[nr];
00332 realt dv = 0;
00333 switch (distribution) {
00334 case 'g':
00335 dv = rand_gauss();
00336 break;
00337 case 'l':
00338 dv = rand_cauchy();
00339 break;
00340 case 'b':
00341 dv = rand_bool() ? -1 : 1;
00342 break;
00343 default:
00344 dv = rand_1_1();
00345 break;
00346 }
00347 return F_->variation_of_a(nr, dv * mult);
00348 }
00349
00350 class ComputeUI
00351 {
00352 public:
00353 ComputeUI(UserInterface *ui) : ui_(ui) { ui->enable_compute_ui(true); }
00354 ~ComputeUI() { ui_->enable_compute_ui(false); }
00355 private:
00356 UserInterface *ui_;
00357 };
00358
00359
00360 void Fit::fit(int max_iter, const vector<DataAndModel*>& dms)
00361 {
00362 start_time_ = clock();
00363 last_refresh_time_ = time(0);
00364 ComputeUI compute_ui(F_->get_ui());
00365 update_parameters(dms);
00366 dmdm_ = dms;
00367 a_orig_ = F_->parameters();
00368 F_->get_fit_container()->push_param_history(a_orig_);
00369 iter_nr_ = 0;
00370 evaluations_ = 0;
00371 user_interrupt = false;
00372 init();
00373 max_iterations_ = max_iter;
00374
00375
00376 int nu = count(par_usage_.begin(), par_usage_.end(), true);
00377 int np = 0;
00378 v_foreach (DataAndModel*, i, dms)
00379 np += (*i)->data()->get_n();
00380 F_->msg ("Fitting " + S(nu) + " (of " + S(na_) + ") parameters to "
00381 + S(np) + " points ...");
00382
00383 autoiter();
00384 }
00385
00386
00387 void Fit::continue_fit(int max_iter)
00388 {
00389 start_time_ = clock();
00390 last_refresh_time_ = time(0);
00391 v_foreach (DataAndModel*, i, dmdm_)
00392 if (!F_->contains_dm(*i) || na_ != size(F_->parameters()))
00393 throw ExecuteError(name + " method should be initialized first.");
00394 update_parameters(dmdm_);
00395 a_orig_ = F_->parameters();
00396 user_interrupt = false;
00397 evaluations_ = 0;
00398 max_iterations_ = max_iter;
00399 autoiter();
00400 }
00401
00402 void Fit::update_parameters(const vector<DataAndModel*>& dms)
00403 {
00404 if (F_->parameters().empty())
00405 throw ExecuteError("there are no fittable parameters.");
00406 if (dms.empty())
00407 throw ExecuteError("No datasets to fit.");
00408
00409 na_ = F_->parameters().size();
00410
00411 par_usage_ = vector<bool>(na_, false);
00412 for (int idx = 0; idx < na_; ++idx) {
00413 int var_idx = F_->find_nr_var_handling_param(idx);
00414 v_foreach (DataAndModel*, i, dms) {
00415 if ((*i)->model()->is_dependent_on_var(var_idx)) {
00416 par_usage_[idx] = true;
00417 break;
00418 }
00419
00420
00421 }
00422 }
00423 if (count(par_usage_.begin(), par_usage_.end(), true) == 0)
00424 throw ExecuteError("No parametrized functions are used in the model.");
00425 }
00426
00427
00428 bool Fit::common_termination_criteria(int iter)
00429 {
00430 bool stop = false;
00431 if (user_interrupt) {
00432 F_->msg ("Fitting stopped manually.");
00433 stop = true;
00434 }
00435 if (max_iterations_ >= 0 && iter >= max_iterations_) {
00436 F_->msg("Maximum iteration number reached.");
00437 stop = true;
00438 }
00439 int max_eval = F_->get_settings()->max_wssr_evaluations;
00440 if (max_eval > 0 && evaluations_ >= max_eval) {
00441 F_->msg("Maximum evaluations number reached.");
00442 stop = true;
00443 }
00444 double max_time = F_->get_settings()->max_fitting_time;
00445 if (max_time > 0 && clock() >= start_time_ + max_time * CLOCKS_PER_SEC) {
00446 F_->msg("Maximum processor time exceeded.");
00447 stop = true;
00448 }
00449 return stop;
00450 }
00451
00452 void Fit::iteration_plot(const vector<realt> &A, realt wssr)
00453 {
00454 int p = F_->get_settings()->refresh_period;
00455 if (p < 0 || (p > 0 && time(0) - last_refresh_time_ < p))
00456 return;
00457 if (F_->get_settings()->fit_replot) {
00458 F_->use_external_parameters(A);
00459 F_->get_ui()->draw_plot(UserInterface::kRepaintImmediately);
00460 }
00461 double elapsed = (clock() - start_time_) / (double) CLOCKS_PER_SEC;
00462 double percent_change = (wssr - wssr_before_) / wssr_before_ * 100.;
00463 int max_eval = F_->get_settings()->max_wssr_evaluations;
00464 F_->msg("Iter: " + S(iter_nr_) + "/"
00465 + (max_iterations_ > 0 ? S(max_iterations_) : string("oo"))
00466 + " Eval: " + S(evaluations_) + "/"
00467 + (max_eval > 0 ? S(max_eval) : string("oo"))
00468 + " WSSR=" + F_->settings_mgr()->format_double(wssr)
00469 + " (" + S(percent_change)+ "%)"
00470 + " CPU time: " + format1<double,16>("%.2f", elapsed) + "s.");
00471 F_->get_ui()->refresh();
00472 last_refresh_time_ = time(0);
00473 }
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488 void Fit::Jordan(vector<realt>& A, vector<realt>& b, int n)
00489 {
00490 assert (size(A) == n*n && size(b) == n);
00491 for (int i = 0; i < n; i++) {
00492 realt amax = 0;
00493 int maxnr = -1;
00494 for (int j = i; j < n; j++)
00495 if (fabs (A[n*j+i]) > amax) {
00496 maxnr = j;
00497 amax = fabs (A[n * j + i]);
00498 }
00499 if (maxnr == -1) {
00500
00501
00502 for (int j = i; j < n; j++)
00503 if (A[n * i + j] || b[i]) {
00504 F_->vmsg (print_matrix(A, n, n, "A"));
00505 F_->msg (print_matrix(b, 1, n, "b"));
00506 throw ExecuteError("In iteration " + S(iter_nr_)
00507 + ": trying to reverse singular matrix."
00508 " Column " + S(i) + " is zeroed.");
00509 }
00510 continue;
00511 }
00512 if (maxnr != i) {
00513 for (int j = i; j < n; j++)
00514 swap (A[n*maxnr+j], A[n*i+j]);
00515 swap (b[i], b[maxnr]);
00516 }
00517 realt c = 1.0 / A[i*n+i];
00518 for (int j = i; j < n; j++)
00519 A[i*n+j] *= c;
00520 b[i] *= c;
00521 for (int k = 0; k < n; k++)
00522 if (k != i) {
00523 realt d = A[k * n + i];
00524 for (int j = i; j < n; j++)
00525 A[k * n + j] -= A[i * n + j] * d;
00526 b[k] -= b[i] * d;
00527 }
00528 }
00529 }
00530
00531
00532 void Fit::reverse_matrix (vector<realt>&A, int n)
00533 {
00534
00535 assert (size(A) == n*n);
00536 vector<realt> A_result(n*n);
00537 for (int i = 0; i < n; i++) {
00538 vector<realt> A_copy = A;
00539 vector<realt> v(n, 0);
00540 v[i] = 1;
00541 Jordan(A_copy, v, n);
00542 for (int j = 0; j < n; j++)
00543 A_result[j * n + i] = v[j];
00544 }
00545 A = A_result;
00546 }
00547
00548
00549
00550 FitMethodsContainer::FitMethodsContainer(Ftk *F_)
00551 : ParameterHistoryMgr(F_), dirty_error_cache_(true)
00552
00553 {
00554 methods_.push_back(new LMfit(F_));
00555 methods_.push_back(new NMfit(F_));
00556 methods_.push_back(new GAfit(F_));
00557 }
00558
00559 FitMethodsContainer::~FitMethodsContainer()
00560 {
00561 purge_all_elements(methods_);
00562 }
00563
00564 realt FitMethodsContainer::get_standard_error(const Variable* var) const
00565 {
00566 if (!var->is_simple())
00567 return -1.;
00568 if (dirty_error_cache_
00569 || errors_cache_.size() != F_->parameters().size()) {
00570 errors_cache_ = F_->get_fit()->get_standard_errors(F_->get_dms());
00571 }
00572 return errors_cache_[var->get_nr()];
00573 }
00574
00575
00576
00577
00578
00579
00580
00581 void ParameterHistoryMgr::load_param_history(int item_nr, bool relative)
00582 {
00583 if (item_nr == -1 && relative && !param_history_.empty()
00584 && param_history_[param_hist_ptr_].size() == F_->parameters().size()
00585 && param_history_[param_hist_ptr_] != F_->parameters())
00586 item_nr = 0;
00587 if (relative)
00588 item_nr += param_hist_ptr_;
00589 else if (item_nr < 0)
00590 item_nr += param_history_.size();
00591 if (item_nr < 0 || item_nr >= size(param_history_))
00592 throw ExecuteError("There is no parameter history item #"
00593 + S(item_nr) + ".");
00594 F_->put_new_parameters(param_history_[item_nr]);
00595 param_hist_ptr_ = item_nr;
00596 }
00597
00598 bool ParameterHistoryMgr::can_undo() const
00599 {
00600 return !param_history_.empty()
00601 && (param_hist_ptr_ > 0 || param_history_[0] != F_->parameters());
00602 }
00603
00604 bool ParameterHistoryMgr::push_param_history(const vector<realt>& aa)
00605 {
00606 param_hist_ptr_ = param_history_.size() - 1;
00607 if (param_history_.empty() || param_history_.back() != aa) {
00608 param_history_.push_back(aa);
00609 ++param_hist_ptr_;
00610 return true;
00611 }
00612 else
00613 return false;
00614 }
00615
00616
00617 string ParameterHistoryMgr::param_history_info() const
00618 {
00619 string s = "Parameter history contains " + S(param_history_.size())
00620 + " items.";
00621 if (!param_history_.empty())
00622 s += " Now at #" + S(param_hist_ptr_);
00623 return s;
00624 }
00625
00626