00001
00002
00003
00004 #include <stdlib.h>
00005 #include <time.h>
00006 #include <algorithm>
00007 #include <numeric>
00008 #include <deque>
00009 #include <math.h>
00010
00011 #include "common.h"
00012 #include "GAfit.h"
00013 #include "logic.h"
00014 #include "settings.h"
00015 #include "numfuncs.h"
00016
00017
00018 using namespace std;
00019
00020 GAfit::GAfit(Ftk* F)
00021 : Fit(F, "genetic_algorithms"),
00022 popsize (100), elitism(0),
00023 mutation_type('u'), p_mutation(0.1), mutate_all_genes(false),
00024 mutation_strength(0.1), crossover_type('u'), p_crossover(0.3),
00025 selection_type('r'), rank_scoring(false), tournament_size(2),
00026 window_size(-1),
00027 linear_scaling_a(1.), linear_scaling_c(2.), linear_scaling_b (1.),
00028 std_dev_stop(0), iter_with_no_progresss_stop(0),
00029 autoplot_indiv_nr(-1),
00030 pop(0), opop(0),
00031 best_indiv(0)
00032 {
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 Crossover_enum ['u'] = "uniform";
00043 Crossover_enum ['o'] = "one-point";
00044 Crossover_enum ['t'] = "two-point";
00045 Crossover_enum ['a'] = "arithmetic1";
00046 Crossover_enum ['A'] = "arithmetic2";
00047 Crossover_enum ['g'] = "guaranteed-avg";
00048
00049
00050
00051
00052
00053 Selection_enum ['r'] = "roulette";
00054 Selection_enum ['t'] = "tournament";
00055 Selection_enum ['s'] = "srs";
00056 Selection_enum ['d'] = "ds";
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070 }
00071
00072 GAfit::~GAfit() {}
00073
00074 void GAfit::init()
00075 {
00076 pop = &pop1;
00077 opop = &pop2;
00078 pop->resize (popsize);
00079 vector<Individual>::iterator best = pop->begin();
00080 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00081 i->g.resize(na_);
00082 for (int j = 0; j < na_; j++)
00083 i->g[j] = draw_a_from_distribution(j);
00084 compute_wssr_for_ind (i);
00085 if (i->raw_score < best->raw_score)
00086 best = i;
00087 }
00088 best_indiv = *best;
00089 }
00090
00091 void GAfit::autoiter()
00092 {
00093 wssr_before_ = compute_wssr(a_orig_, dmdm_);
00094 F_->msg ("WSSR before starting GA: " + S(wssr_before_));
00095 assert (pop && opop);
00096 if (elitism >= popsize) {
00097 F_->warn ("hmm, now elitism >= popsize, setting elitism = 1");
00098 elitism = 1;
00099 }
00100 for (int iter = 0; !termination_criteria_and_print_info(iter); iter++) {
00101 autoplot_in_autoiter();
00102 ++iter_nr_;
00103 pre_selection();
00104 crossover();
00105 mutation();
00106 post_selection();
00107 }
00108 post_fit (best_indiv.g, best_indiv.raw_score);
00109 }
00110
00111 void GAfit::compute_wssr_for_ind (vector<Individual>::iterator ind)
00112 {
00113 ind->raw_score = compute_wssr(ind->g, dmdm_);
00114 ind->generation = iter_nr_;
00115 }
00116
00117 void GAfit::autoplot_in_autoiter()
00118 {
00119 const Individual& indiv = is_index(autoplot_indiv_nr, *pop)
00120 ? (*pop)[autoplot_indiv_nr] : best_indiv;
00121 iteration_plot(indiv.g, indiv.raw_score);
00122 }
00123
00124 void GAfit::mutation()
00125 {
00126 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00127 if (mutate_all_genes) {
00128 if (rand() < RAND_MAX * p_mutation) {
00129 for (int j = 0; j < na_; j++)
00130 i->g[j] = draw_a_from_distribution(j, mutation_type,
00131 mutation_strength);
00132 compute_wssr_for_ind (i);
00133 }
00134 }
00135 else
00136 for (int j = 0; j < na_; j++)
00137 if (rand() < RAND_MAX * p_mutation) {
00138 i->g[j] = draw_a_from_distribution(j, mutation_type,
00139 mutation_strength);
00140 compute_wssr_for_ind (i);
00141 }
00142 }
00143 }
00144
00145 void GAfit::crossover()
00146 {
00147 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++)
00148 if (rand() < RAND_MAX / 2 * p_crossover) {
00149 vector<Individual>::iterator i2 = pop->begin() + rand()%pop->size();
00150 switch (crossover_type) {
00151 case 'u':
00152 uniform_crossover (i, i2);
00153 break;
00154 case 'o':
00155 one_point_crossover (i, i2);
00156 break;
00157 case 't':
00158 two_points_crossover (i, i2);
00159 break;
00160 case 'a':
00161 arithmetic_crossover1 (i, i2);
00162 break;
00163 case 'A':
00164 arithmetic_crossover2 (i, i2);
00165 break;
00166 case 'g':
00167 guaranteed_avarage_crossover (i, i2);
00168 break;
00169 default:
00170 F_->warn ("No such crossover-type: " + S(crossover_type)
00171 + ". Setting to 'u'");
00172 crossover_type = 'u';
00173 uniform_crossover (i, i2);
00174 break;
00175 }
00176 compute_wssr_for_ind (i);
00177 compute_wssr_for_ind (i2);
00178 }
00179 }
00180
00181 void GAfit::uniform_crossover (vector<Individual>::iterator c1,
00182 vector<Individual>::iterator c2)
00183 {
00184 for (int i = 0; i < na_; i++)
00185 if (rand() % 2)
00186 swap(c1->g[i], c2->g[i]);
00187 }
00188
00189 void GAfit::one_point_crossover (vector<Individual>::iterator c1,
00190 vector<Individual>::iterator c2)
00191 {
00192 int p = rand() % na_;
00193 for (int j = 0; j < p; j++)
00194 swap(c1->g[j], c2->g[j]);
00195 }
00196
00197 void GAfit::two_points_crossover (vector<Individual>::iterator c1,
00198 vector<Individual>::iterator c2)
00199 {
00200 int p1 = rand() % na_;
00201 int p2 = rand() % na_;
00202 for (int j = min(p1, p2); j < max(p1, p2); j++)
00203 swap(c1->g[j], c2->g[j]);
00204 }
00205
00206 void GAfit::arithmetic_crossover1 (vector<Individual>::iterator c1,
00207 vector<Individual>::iterator c2)
00208 {
00209 realt a = rand_0_1();
00210 for (int j = 0; j < na_; j++) {
00211 c1->g[j] = a * c1->g[j] + (1 - a) * c2->g[j];
00212 c2->g[j] = (1 - a) * c1->g[j] + a * c2->g[j]; ;
00213 }
00214 }
00215
00216 void GAfit::arithmetic_crossover2 (vector<Individual>::iterator c1,
00217 vector<Individual>::iterator c2)
00218 {
00219 for (int j = 0; j < na_; j++) {
00220 realt a = rand_0_1();
00221 c1->g[j] = a * c1->g[j] + (1 - a) * c2->g[j];
00222 c2->g[j] = (1 - a) * c1->g[j] + a * c2->g[j]; ;
00223 }
00224 }
00225
00226 void GAfit::guaranteed_avarage_crossover (vector<Individual>::iterator c1,
00227 vector<Individual>::iterator c2)
00228 {
00229 for (int j = 0; j < na_; j++)
00230 c1->g[j] = c2->g[j] = (c1->g[j] + c2->g[j]) / 2;
00231 }
00232
00233 void GAfit::pre_selection()
00234 {
00235 vector<int> next(popsize - elitism);
00236 switch (selection_type) {
00237 case 'r':
00238 scale_score();
00239 roulette_wheel_selection(next);
00240 break;
00241 case 't':
00242 tournament_selection(next);
00243 break;
00244 case 's':
00245 scale_score();
00246 stochastic_remainder_sampling(next);
00247 break;
00248 case 'd':
00249 scale_score();
00250 deterministic_sampling_selection(next);
00251 break;
00252 default:
00253 F_->warn ("No such selection-type: " + S((char)selection_type)
00254 + ". Setting to 'r'");
00255 selection_type = 'r';
00256 pre_selection ();
00257 return;
00258 }
00259 opop->resize(next.size(), Individual(na_));
00260 for (int i = 0; i < size(next); i++)
00261 (*opop)[i] = (*pop)[next[i]];
00262 swap (pop, opop);
00263 }
00264
00265 void GAfit::post_selection()
00266 {
00267 if (elitism == 0)
00268 return;
00269 do_rank_scoring (opop);
00270 for (vector<Individual>::iterator i = opop->begin(); i != opop->end(); i++)
00271 if (i->phase_2_score < elitism)
00272 pop->push_back (*i);
00273 assert (size(*pop) == popsize);
00274 }
00275
00276 struct ind_raw_sc_cmp
00277 {
00278 bool operator() (Individual* a, Individual* b) {
00279 return a->raw_score < b->raw_score;
00280 }
00281 };
00282
00283 void GAfit::do_rank_scoring(vector<Individual> *popp)
00284 {
00285
00286
00287 static vector<Individual*> ind_p;
00288 ind_p.resize(popp->size());
00289 for (unsigned int i = 0; i < popp->size(); i++)
00290 ind_p[i] = &(*popp)[i];
00291 sort (ind_p.begin(), ind_p.end(), ind_raw_sc_cmp());
00292 for (unsigned int i = 0; i < popp->size(); i++)
00293 ind_p[i]->phase_2_score = i;
00294 }
00295
00296 void GAfit::roulette_wheel_selection(vector<int>& next)
00297 {
00298 vector<unsigned int> roulette(pop->size());
00299 unsigned int t = 0;
00300 for (int i = 0; i < size(*pop) - 1; i++) {
00301 t += static_cast<unsigned int>
00302 ((*pop)[i].norm_score * RAND_MAX / size(*pop));
00303 roulette[i] = t;
00304 }
00305 roulette[size(*pop) - 1] = RAND_MAX;
00306 for (vector<int>::iterator i = next.begin(); i != next.end(); i++)
00307 *i = lower_bound (roulette.begin(), roulette.end(),
00308 static_cast<unsigned int>(rand())) - roulette.begin();
00309 }
00310
00311 void GAfit::tournament_selection(vector<int>& next)
00312 {
00313 for (vector<int>::iterator i = next.begin(); i != next.end(); i++) {
00314 int best = rand() % pop->size();
00315 for (int j = 1; j < tournament_size; j++) {
00316 int n = rand() % pop->size();
00317 if ((*pop)[n].raw_score < (*pop)[best].raw_score)
00318 best = n;
00319 }
00320 *i = best;
00321 }
00322 }
00323
00324 vector<int>::iterator
00325 GAfit::SRS_and_DS_common (vector<int>& next)
00326 {
00327 vector<int>::iterator r = next.begin();
00328 realt f = 1.0 * next.size() / pop->size();
00329 for (unsigned int i = 0; i < pop->size(); i++) {
00330 int n = static_cast<int>((*pop)[i].norm_score * f);
00331 fill (r, min (r + n, next.end()), i);
00332 r += n;
00333 }
00334 return min (r, next.end());
00335 }
00336
00337 void GAfit::stochastic_remainder_sampling(vector<int>& next)
00338 {
00339 vector<int>::iterator r = SRS_and_DS_common (next);
00340 if (r == next.end())
00341 return;
00342 vector<int> rest_of_next (next.end() - r);
00343 copy (rest_of_next.begin(), rest_of_next.end(), r);
00344 }
00345
00346 struct Remainder_and_ptr {
00347 int ind;
00348 realt r;
00349 bool operator< (const Remainder_and_ptr &b) {
00350 return r < b.r;
00351 }
00352 };
00353
00354 void GAfit::deterministic_sampling_selection(vector<int>& next)
00355 {
00356 vector<int>::iterator r = SRS_and_DS_common (next);
00357 if (r == next.end())
00358 return;
00359 static vector<Remainder_and_ptr> rem;
00360 rem.resize(pop->size());
00361 for (unsigned int i = 0; i < pop->size(); i++) {
00362 rem[i].ind = i;
00363 realt x = (*pop)[i].norm_score;
00364 rem[i].r = x - floor(x);
00365 }
00366 int rest = next.end() - r;
00367 partial_sort (rem.begin(), rem.begin() + rest, rem.end());
00368 for (int i = 0; i < rest; i++, r++)
00369 *r = rem[i].ind;
00370 assert (r == next.end());
00371 }
00372
00373 void GAfit::scale_score ()
00374 {
00375 if (rank_scoring)
00376 do_rank_scoring(pop);
00377 else
00378 for (vector<Individual>::iterator i = pop->begin(); i !=pop->end(); i++)
00379 i->phase_2_score = i->raw_score;
00380
00381
00382 realt q = max_in_window();
00383 if (q < 0)
00384 q = std_dev_based_q();
00385 q += linear_scaling_b;
00386 realt sum = 0;
00387 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00388 i->reversed_score = max(q - i->phase_2_score, (realt) 0.);
00389 sum += i->reversed_score;
00390 }
00391 if (sum == 0)
00392 return;
00393 realt avg_rev_sc = sum / pop->size();
00394 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++)
00395 i->norm_score = i->reversed_score / avg_rev_sc;
00396 }
00397
00398 realt GAfit::std_dev_based_q()
00399 {
00400 realt sum_p = 0, sum_p2 = 0;
00401 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00402 sum_p += i->phase_2_score;
00403 sum_p2 += i->phase_2_score * i->phase_2_score;
00404 }
00405 realt avg_p2 = sum_p2 / pop->size();
00406 realt avg_p = sum_p / pop->size();
00407 realt sq_sigma = avg_p2 - avg_p * avg_p;
00408 realt sigma = sq_sigma > 0 ? sqrt (sq_sigma) : 0;
00409 return linear_scaling_a * avg_p + linear_scaling_c * sigma;
00410 }
00411
00412 realt GAfit::max_in_window ()
00413 {
00414
00415
00416 const int hist_len = 200;
00417 static deque<realt> max_raw_history (hist_len, 0);
00418 max_raw_history.push_front (tmp_max);
00419 max_raw_history.pop_back();
00420 assert (window_size <= hist_len);
00421 if (window_size > 0) {
00422 if (rank_scoring)
00423 return size(*pop) - 1;
00424 else
00425 return *max_element (max_raw_history.begin(),
00426 max_raw_history.begin() + window_size);
00427 }
00428 else
00429 return -1;
00430 }
00431
00432 bool GAfit::termination_criteria_and_print_info (int iter)
00433 {
00434 static int no_progress_iters = 0;
00435 realt sum = 0;
00436 realt min = pop->front().raw_score;
00437 tmp_max = min;
00438 vector<Individual>::iterator ibest = pop->begin();
00439 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00440 if (i->raw_score < min) {
00441 min = i->raw_score;
00442 ibest = i;
00443 }
00444 if (i->raw_score > tmp_max)
00445 tmp_max = i->raw_score;
00446 sum += i->raw_score;
00447 }
00448 realt avg = sum / pop->size();
00449 realt sq_sum = 0;
00450 realt generations_sum = 0;
00451 for (vector<Individual>::iterator i = pop->begin(); i != pop->end(); i++) {
00452 realt d = i->raw_score - avg;
00453 sq_sum += d * d;
00454 generations_sum += i->generation;
00455 }
00456 realt std_dev = sq_sum > 0 ? sqrt (sq_sum / pop->size()) : 0;
00457 F_->msg("Population #" + S(iter_nr_) + ": best " + S(min)
00458 + ", avg " + S(avg) + ", worst " + S(tmp_max)
00459 + ", std dev. " + S(std_dev));
00460 if (min < best_indiv.raw_score) {
00461 best_indiv = *ibest;
00462 no_progress_iters = 0;
00463 }
00464 else
00465 no_progress_iters ++;
00466
00467
00468 bool stop = false;
00469
00470 if (common_termination_criteria(iter))
00471 stop = true;
00472 if (std_dev < std_dev_stop * avg) {
00473 F_->msg("Standard deviation of results is small enough to stop");
00474 stop = true;
00475 }
00476 if (iter_with_no_progresss_stop > 0
00477 && no_progress_iters >= iter_with_no_progresss_stop) {
00478 F_->msg("No progress in " + S(no_progress_iters) + " iterations. Stop");
00479 stop = true;
00480 }
00481 return stop;
00482 }
00483
00484