FORM v5.0.0-35-g6318119
optimize.cc
Go to the documentation of this file.
1
6/* #[ License : */
7/*
8 * Copyright (C) 1984-2026 J.A.M. Vermaseren
9 * When using this file you are requested to refer to the publication
10 * J.A.M.Vermaseren "New features of FORM" math-ph/0010025
11 * This is considered a matter of courtesy as the development was paid
12 * for by FOM the Dutch physics granting agency and we would like to
13 * be able to track its scientific use to convince FOM of its value
14 * for the community.
15 *
16 * This file is part of FORM.
17 *
18 * FORM is free software: you can redistribute it and/or modify it under the
19 * terms of the GNU General Public License as published by the Free Software
20 * Foundation, either version 3 of the License, or (at your option) any later
21 * version.
22 *
23 * FORM is distributed in the hope that it will be useful, but WITHOUT ANY
24 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
26 * details.
27 *
28 * You should have received a copy of the GNU General Public License along
29 * with FORM. If not, see <http://www.gnu.org/licenses/>.
30 */
31/*
32 #] License :
33 #[ includes :
34*/
35
36//#define DEBUG
37//#define DEBUG_MORE
38//#define DEBUG_MCTS
39//#define DEBUG_GREEDY
40
41#ifdef HAVE_CONFIG_H
42#ifndef CONFIG_H_INCLUDED
43#define CONFIG_H_INCLUDED
44#include <config.h>
45#endif
46#endif
47
48#include <vector>
49#include <stack>
50#include <algorithm>
51#include <set>
52#include <map>
53#include <climits>
54#include <cmath>
55#include <string>
56#include <algorithm>
57#include <iostream>
58
59#ifdef APPLE64
60#ifndef HAVE_UNORDERED_MAP
61#define HAVE_UNORDERED_MAP
62#endif
63#ifndef HAVE_UNORDERED_SET
64#define HAVE_UNORDERED_SET
65#endif
66#endif
67
68#ifdef HAVE_UNORDERED_MAP
69 #include <unordered_map>
70 using std::unordered_map;
71#elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP)
72 #include <boost/unordered_map.hpp>
73 using boost::unordered_map;
74#else
75 #include <tr1/unordered_map>
76 using std::tr1::unordered_map;
77#endif
78#ifdef HAVE_UNORDERED_SET
79 #include <unordered_set>
80 using std::unordered_set;
81#elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP)
82 #include <boost/unordered_set.hpp>
83 using boost::unordered_set;
84#else
85 #include <tr1/unordered_set>
86 using std::tr1::unordered_set;
87#endif
88
89#if defined(HAVE_BUILTIN_POPCOUNT)
90 static inline int popcount(unsigned int x) {
91 return __builtin_popcount(x);
92 }
93#elif defined(HAVE_POPCNT)
94 #include <intrin.h>
95 static inline int popcount(unsigned int x) {
96 return __popcnt(x);
97 }
98#else
99 static inline int popcount(unsigned int x) {
100 int count = 0;
101 while (x > 0) {
102 if ((x & 1) == 1) count++;
103 x >>= 1;
104 }
105 return count;
106 }
107#endif
108
109extern "C" {
110#include "form3.h"
111}
112
113//#ifdef DEBUG
114#include "mytime.h"
115//#endif
116
117using namespace std;
118
119// operators
120const WORD OPER_ADD = -1;
121const WORD OPER_MUL = -2;
122const WORD OPER_COMMA = -3;
123
124// class for a node of the MCTS tree
126public:
127 vector<tree_node> childs;
128 double sum_results;
129 int num_visits;
130 WORD var;
131 bool finished;
132
133 tree_node (int _var=0):
134 sum_results(0), num_visits(0), var(_var), finished(false) {}
135
136 tree_node(const tree_node& other):
137 childs(other.childs),
138 sum_results(other.sum_results),
139 num_visits(other.num_visits),
140 var(other.var),
141 finished(other.finished) {}
142
143 tree_node& operator=(const tree_node& other) {
144 if (this != &other) {
145 childs = other.childs;
146 sum_results = other.sum_results;
147 num_visits = other.num_visits;
148 var = other.var;
149 finished = other.finished;
150 }
151 return *this;
152 }
153};
154
155// global variables for multithreading
156WORD *optimize_expr;
157vector<vector<WORD> > optimize_best_Horner_schemes;
158int optimize_num_vars;
159int optimize_best_num_oper;
160vector<WORD> optimize_best_instr;
161vector<WORD> optimize_best_vars;
162
163// global variables for MCTS
164bool mcts_factorized, mcts_separated;
165vector<WORD> mcts_vars;
166tree_node mcts_root;
167int mcts_expr_score;
168set<pair<int,vector<WORD> > > mcts_best_schemes;
169
170#ifdef WITHPTHREADS
171pthread_mutex_t optimize_lock;
172#endif
173
174/*
175 #] includes :
176 #[ print_instr :
177*/
178
179void print_instr (const vector<WORD> &instr, WORD num)
180{
181 const WORD *tbegin = &*instr.begin();
182 const WORD *tend = tbegin+instr.size();
183 for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
184 MesPrint("<%d> %a",num,t[2],t);
185 }
186}
187
188/*
189 #] print_instr :
190 #[ my_random_shuffle :
191*/
192
200template <class RandomAccessIterator>
201void my_random_shuffle (PHEAD RandomAccessIterator fr, RandomAccessIterator to) {
202 for (int i=to-fr-1; i>0; --i)
203 swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
204}
205
206/*
207 #] my_random_shuffle :
208 #[ get_expression :
209*/
210
211static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
212
225LONG get_expression (int exprnr) {
226
227 GETIDENTITY;
228
229 AR.NoCompress = 1;
230
231 NewSort(BHEAD0);
232 EXPRESSIONS e = Expressions+exprnr;
233 SetScratch(AR.infile,&(e->onfile));
234
235 // get header term
236 WORD *term = AT.WorkPointer;
237 GetTerm(BHEAD term);
238
239 NewSort(BHEAD0);
240
241 // get terms
242 while (GetTerm(BHEAD term) > 0) {
243 AT.WorkPointer = term + *term;
244 WORD *t1 = term;
245 WORD *t2 = term + *term;
246 if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0) return -1;
247 int n = *t2;
248 NCOPY(t1,t2,n);
249 AT.WorkPointer = term + *term;
250 if (StoreTerm(BHEAD term)) return -1;
251 }
252
253 // sort and store in buffer
254 LONG len = EndSort(BHEAD (WORD *)((void *)(&optimize_expr)),2);
256 AT.WorkPointer = term;
257
258 return len;
259}
260
261/*
262 #] get_expression :
263 #[ PF_get_expression :
264*/
265#ifdef WITHMPI
266
267// get_expression for ParFORM
268LONG PF_get_expression (int exprnr) {
269 LONG len;
270 if (PF.me == MASTER) {
271 len = get_expression(exprnr);
272 }
273 if (PF.numtasks > 1) {
274 PF_BroadcastBuffer(&optimize_expr, &len);
275 }
276 return len;
277}
278
279// replace get_expression called in Optimize
280#define get_expression PF_get_expression
281
282#endif
283/*
284 #] PF_get_expression :
285 #[ get_brackets :
286*/
287
302vector<vector<WORD> > get_brackets () {
303
304 // check for brackets in expression
305 bool has_brackets = false;
306 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
307 WORD *tend=t+*t; tend-=ABS(*(tend-1));
308 for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
309 if (*t1 == HAAKJE)
310 has_brackets=true;
311 }
312
313 // replace brackets by SEPARATESYMBOL
314 vector<vector<WORD> > brackets;
315
316 if (has_brackets) {
317 int exprlen=10; // we need potential space for an empty bracket
318 for (WORD *t=optimize_expr; *t!=0; t+=*t)
319 exprlen += *t;
320 WORD *newexpr = (WORD *)Malloc1(exprlen*sizeof(WORD), "optimize newexpr");
321
322 int i=0;
323 int sep_power = 0;
324
325 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
326 WORD *t1 = t+1;
327
328 // scan for bracket
329 vector<WORD> bracket;
330 for (; *t1!=HAAKJE; t1+=*(t1+1))
331 bracket.insert(bracket.end(), t1, t1+*(t1+1));
332
333 if (brackets.size()==0 || bracket!=brackets.back()) {
334 sep_power++;
335 brackets.push_back(bracket);
336 }
337 t1+=*(t1+1);
338
339 WORD left = t + *t - t1;
340 bool more_symbols = (left != ABS(*(t+*t-1)));
341
342 // add power of SEPARATESYMBOL
343 newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
344 newexpr[i++] = SYMBOL;
345 newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
346 newexpr[i++] = SEPARATESYMBOL;
347 newexpr[i++] = sep_power;
348
349 // add remaining terms
350 if (more_symbols) {
351 t1+=2;
352 left-=2;
353 }
354 while (left-->0)
355 newexpr[i++] = *(t1++);
356 }
357/*
358 We insert here an empty bracket that is zero.
359 It is used for the case that there is only a single bracket which is
360 outside the notation for trees at a later stage.
361
362 There may be a problem now in that in the case of sep_power==1
363 newexpr is bigger than optimize_expr. We have to check that.
364*/
365 if ( sep_power == 1 )
366 {
367 WORD *t;
368 vector<WORD> bracket;
369 bracket.push_back(0);
370 bracket.push_back(0);
371 bracket.push_back(0);
372 bracket.push_back(0);
373 sep_power++;
374 brackets.push_back(bracket);
375 newexpr[i++] = 8;
376 newexpr[i++] = SYMBOL;
377 newexpr[i++] = 4;
378 newexpr[i++] = SEPARATESYMBOL;
379 newexpr[i++] = sep_power;
380 newexpr[i++] = 1;
381 newexpr[i++] = 1;
382 newexpr[i++] = 3;
383 newexpr[i++] = 0;
384 for (t=optimize_expr; *t!=0; t+=*t) {}
385 if ( t-optimize_expr+1 < i ) { // We have to redo this
386 M_free(optimize_expr,"$-sort space");
387 optimize_expr = (WORD *)Malloc1(i*sizeof(WORD),"$-sort space");
388 }
389 }
390 else {
391 newexpr[i++] = 0;
392 }
393 memcpy(optimize_expr, newexpr, i*sizeof(WORD));
394 M_free(newexpr,"optimize newexpr");
395
396 // if factorized, replace SEP by FAC and remove brackets
397 if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
398 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
399 if (*t == ABS(*(t+*t-1))+1) continue;
400 if (t[1]==SYMBOL)
401 for (int i=3; i<t[2]; i+=2)
402 if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
403 }
404 return vector<vector<WORD> >();
405 }
406 }
407
408 return brackets;
409}
410
411/*
412 #] get_brackets :
413 #[ count_operators :
414*/
415
422int count_operators (const WORD *expr, bool print=false) {
423
424 int n=0;
425 while (*(expr+n)!=0) n+=*(expr+n);
426
427 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
428 WORD maxpowfac=1, maxpowsep=1;
429
430 for (const WORD *t=expr; *t!=0; t+=*t) {
431 if (t!=expr) cntadd++; // new term
432 if (*t==ABS(*(t+*t-1))+1) continue; // only coefficient
433
434 int cntsym=0;
435
436 if (t[1]==SYMBOL)
437 for (int i=3; i<t[2]; i+=2) {
438 if (t[i]==FACTORSYMBOL) {
439 maxpowfac = max(maxpowfac, t[i+1]);
440 continue;
441 }
442 if (t[i]==SEPARATESYMBOL) {
443 maxpowsep = max(maxpowsep, t[i+1]);
444 continue;
445 }
446 if (t[i+1]>2) { // (extra)symbol power>2
447 cntpow++;
448 sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
449 }
450 if (t[i+1]==2) cntmul++; // (extra)symbol squared
451 cntsym++;
452 }
453
454 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++; // non +/-1 coefficient
455
456 if (cntsym > 0) cntmul+=cntsym-1;
457 }
458
459 cntadd -= maxpowfac-1;
460 cntmul += maxpowfac-1;
461
462 cntadd -= maxpowsep-1;
463
464 if (print)
465 MesPrint ("*** STATS: original %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
466
467 return sumpow+cntmul+cntadd;
468}
469
476int count_operators (const vector<WORD> &instr, bool print=false) {
477
478 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
479
480 const WORD *ebegin = &*instr.begin();
481 const WORD *eend = ebegin+instr.size();
482
483 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
484 for (const WORD *t=e+3; *t!=0; t+=*t) {
485 if (t!=e+3) {
486 if (*(e+1)==OPER_ADD) cntadd++; // new term
487 if (*(e+1)==OPER_MUL) cntmul++; // new term
488 }
489 if (*t == ABS(*(t+*t-1))+1) continue; // only coefficient
490 if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
491 if (*(t+4)==2) cntmul++; // (extra)symbol squared
492 if (*(t+4)>2) { // (extra)symbol power>2
493 cntpow++;
494 sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
495 }
496 }
497 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++; // non +/-1 coefficient
498 }
499 }
500
501 if (print)
502 MesPrint ("*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
503
504 return sumpow+cntmul+cntadd;
505}
506
507/*
508 #] count_operators :
509 #[ occurrence_order :
510*/
511
519vector<WORD> occurrence_order (const WORD *expr, bool rev) {
520
521 // count the number of occurrences of variables
522 map<WORD,int> cnt;
523 for (const WORD *t=expr; *t!=0; t+=*t) {
524 if (*t == ABS(*(t+*t-1))+1) continue; // skip constant term
525 if (t[1] == SYMBOL)
526 for (int i=3; i<t[2]; i+=2)
527 cnt[t[i]]++;
528 }
529
530 bool is_fac=false, is_sep=false;
531 if (cnt.count(FACTORSYMBOL)) {
532 is_fac=true;
533 cnt.erase(FACTORSYMBOL);
534 }
535 if (cnt.count(SEPARATESYMBOL)) {
536 is_sep=true;
537 cnt.erase(SEPARATESYMBOL);
538 }
539
540 // determine the order of the variables
541 vector<pair<int,WORD> > cnt_order;
542 for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
543 cnt_order.push_back(make_pair(i->second, i->first));
544 sort(cnt_order.rbegin(), cnt_order.rend());
545
546 // create resulting order
547 vector<WORD> order;
548 for (int i=0; i<(int)cnt_order.size(); i++)
549 order.push_back(cnt_order[i].second);
550
551 if (rev) reverse(order.begin(),order.end());
552
553 // add FACTORSYMBOL/SEPARATESYMBOL
554 if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
555 if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
556
557 return order;
558}
559
560/*
561 #] occurrence_order :
562 #[ Horner_tree :
563*/
564
600WORD getpower (const WORD *term, int var, int pos) {
601 if (*term == ABS(*(term+*term-1))+1) return 0; // constant term
602 if (2*pos+2 >= term[2]) return 0; // too few symbols
603 if (term[2*pos+3] != var) return 0; // incorrect symbol
604 return term[2*pos+4]; // return power
605}
606
614void fixarg (UWORD *t, WORD &n) {
615 int an=ABS(n), sn=SGN(n);
616 while (*(t+an-1)==0) an--;
617 n=an*sn;
618}
619
620void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
621 fixarg(a,na);
622 fixarg(b,nb);
623 GcdLong(BHEAD a,na,b,nb,c,nc);
624}
625
626void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd) {
627 fixarg(a,na);
628 fixarg(b,nb);
629 DivLong(a,na,b,nb,c,nc,d,nd);
630}
631
652void build_Horner_tree (const WORD **terms, int numterms, int var, int maxvar, int pos, vector<WORD> *res) {
653
654 GETIDENTITY;
655
656 if (var == maxvar) {
657 // Horner tree is finished, so add remaining terms unfactorized
658 // (note: since only complete Horner schemes seem to be useful, numterms=1 here
659
660 for (int fr=0; fr<numterms; fr++) {
661
662 bool empty = true;
663 const WORD *t = terms[fr];
664
665 // add symbols
666 if (*t != ABS(*(t+*t-1))+1)
667 for (int i=3+2*pos; i<t[2]; i+=2) {
668 res->push_back(SYMBOL);
669 res->push_back(4);
670 res->push_back(t[i]);
671 res->push_back(t[i+1]);
672 if (!empty) res->push_back(OPER_MUL);
673 empty = false;
674 }
675
676 // if empty, add a 1, since the result should look like "... coeff *"
677 if (empty) {
678 res->push_back(SNUMBER);
679 res->push_back(5);
680 res->push_back(1);
681 res->push_back(1);
682 res->push_back(3);
683 }
684
685 // add coefficient
686 res->push_back(SNUMBER);
687 WORD n = ABS(*(t+*t-1));
688 res->push_back(n+2);
689 for (int i=0; i<n; i++)
690 res->push_back(*(t+*t-n+i));
691 res->push_back(OPER_MUL);
692
693 if (fr>0) res->push_back(OPER_ADD);
694 }
695
696 // result should end with gcd of the terms; right now this never
697 // triggers, but if one would allow for incomplete Horner schemes,
698 // one should extract the gcd here
699 if (numterms > 1) {
700 res->push_back(SNUMBER);
701 res->push_back(5);
702 res->push_back(1);
703 res->push_back(1);
704 res->push_back(3);
705 res->push_back(OPER_MUL);
706 }
707 }
708 else {
709 // extract variable "var" and the gcd and proceed recursively
710
711 WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
712 UWORD *num = NumberMalloc("build_Horner_tree");
713 UWORD *den = NumberMalloc("build_Horner_tree");
714 UWORD *tmp = NumberMalloc("build_Horner_tree");
715 UWORD *dum = NumberMalloc("build_Horner_tree");
716
717 // previous coefficient for gcd extraction or coefficient multiplication
718 int prev_coeff_idx = -1;
719
720 for (int fr=0; fr<numterms;) {
721
722 // find power of current term
723 WORD pow = getpower(terms[fr], var, pos);
724
725 // find all terms with that power
726 int to=fr+1;
727 while (to<numterms && getpower(terms[to], var, pos) == pow) to++;
728
729 // recursively build Horner tree of all terms proportional to var^pow
730 build_Horner_tree (terms+fr, to-fr, var+1, maxvar, pow==0?pos:pos+1, res);
731
732 if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
733 // if normal symbol, find gcd(numerators) and gcd(denominators)
734 WORD n1 = res->at(res->size()-2) / 2;
735 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
736
737 WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
738 WORD n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
739 if (fr>0) t2+=2;
740
741 GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
742 GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
743
744 // divide out gcds; note: leading zeroes can be added here
745 for (int i=0; i<2; i++) {
746 if (i==1 && fr==0) break;
747
748 WORD *t = i==0 ? t1 : t2;
749 WORD n = i==0 ? n1 : n2;
750
751 DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
752 for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
753 for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
754
755 if (SGN(ntmp) != SGN(n)) n=-n;
756
757 DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
758 for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
759 for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
760
761 *t++ = SGN(n) * (2*ABS(n)+1);
762 }
763
764 // add the addition operator
765 if (fr>0) res->push_back(OPER_ADD);
766
767 // add the power of "var"
768 WORD nextpow = (to==numterms ? 0 : getpower(terms[to], var, pos));
769
770 if (pow-nextpow > 0) {
771 res->push_back(SYMBOL);
772 res->push_back(4);
773 res->push_back(var);
774 res->push_back(pow-nextpow);
775 res->push_back(OPER_MUL);
776 }
777
778 // add the extracted gcd
779 res->push_back(SNUMBER);
780 WORD n = MaX(ABS(nnum),nden);
781 res->push_back(n*2+3);
782 for (int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
783 for (int i=0; i<n-ABS(nnum); i++) res->push_back(0);
784 for (int i=0; i<nden; i++) res->push_back(den[i]);
785 for (int i=0; i<n-ABS(nden); i++) res->push_back(0);
786 res->push_back(SGN(nnum)*(2*n+1));
787 res->push_back(OPER_MUL);
788
789 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
790 }
791 else if (AN.poly_vars[var]==FACTORSYMBOL) {
792
793 // if factorsymbol, multiply overall integer contents
794
795 if (fr>0) {
796 WORD n1 = res->at(res->size()-2) / 2;
797 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
798 WORD *t2 = &res->at(prev_coeff_idx);
799 WORD n2 = *(t2+*(t2+1)-1) / 2;
800 t2+=2;
801
802 MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
803
804 // replace previous coefficient with 1
805 n2=ABS(n2);
806 for (int i=0; i<ABS(n2); i++)
807 t2[i] = t2[n2+i] = i==0 ? 1 : 0;
808 t2[2*n2] = 2*n2+1;
809
810 // remove this coefficient
811 for (int i=0; i<2*ABS(n1)+2; i++)
812 res->pop_back();
813
814 // add product
815 res->back() = 2*ABS(ntmp)+3; // adjust size of term
816 res->insert(res->end(), tmp, tmp+2*ABS(ntmp)); // num/den coefficient
817 res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1)); // size of coefficient
818 res->push_back(OPER_MUL); // operator
819 }
820
821 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
822
823 // multiply previous factors with this factor
824 if (fr>0)
825 res->push_back(OPER_MUL);
826 }
827 else { // AN.poly_vars[var]==SEPARATESYMBOL
828 if (fr>0)
829 res->push_back(OPER_COMMA);
830 prev_coeff_idx = -1;
831 }
832
833 fr=to;
834 }
835
836 // cleanup
837 NumberFree(dum,"build_Horner_tree");
838 NumberFree(tmp,"build_Horner_tree");
839 NumberFree(den,"build_Horner_tree");
840 NumberFree(num,"build_Horner_tree");
841 }
842}
843
852bool term_compare (const WORD *a, const WORD *b) {
853 if (*a == ABS(*(a+*a-1))+1) return true; // coefficient comes first
854 if (*b == ABS(*(b+*b-1))+1) return false;
855 if (a[1]!=SYMBOL) return true;
856 if (b[1]!=SYMBOL) return false;
857 for (int i=3; i<a[2] && i<b[2]; i+=2) {
858 if (a[i] !=b[i] ) return a[i] >b[i];
859 if (a[i+1]!=b[i+1]) return a[i+1]<b[i+1];
860 }
861 return a[2]<b[2];
862}
863
873vector<WORD> Horner_tree (const WORD *expr, const vector<WORD> &order) {
874#ifdef DEBUG
875 MesPrint ("*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
876#endif
877
878 GETIDENTITY;
879
880 // find the renumbering scheme (new numbers are 0,1,...,#vars-1)
881 map<WORD,WORD> renum;
882 AN.poly_num_vars = order.size();
883 AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*sizeof(WORD), "AN.poly_vars");
884 for (int i=0; i<AN.poly_num_vars; i++) {
885 AN.poly_vars[i] = order[i];
886 renum[order[i]] = i;
887 }
888
889 // sort variables in individual terms using bubble sort
890 WORD* sorted;
891 WORD* dynamicAlloc = 0;
892 LONG sumsize = 0;
893
894 for (const WORD *t=expr; *t!=0; t+=*t) {
895 sumsize += *t;
896 }
897
898 // We actually need sumsize + 1 WORDS available, due to the "*sorted = 0;"
899 // at the end of the sort loop.
900 if ( AT.WorkPointer + sumsize + 1 > AT.WorkTop ) {
901 dynamicAlloc = (WORD*)Malloc1(sizeof(*dynamicAlloc)*(sumsize+1), "Horner_tree alloc");
902 sorted = dynamicAlloc;
903 }
904 else {
905 sorted = AT.WorkPointer;
906 }
907
908 for (const WORD *t=expr; *t!=0; t+=*t) {
909 memcpy(sorted, t, *t*sizeof(WORD));
910
911 if (*t != ABS(*(t+*t-1))+1) {
912 // non-constant term
913
914 // renumber variables, adding new variables if necessary
915 for (int i=3; i<sorted[2]; i+=2) {
916 if (!renum.count(sorted[i])) {
917 renum[sorted[i]] = AN.poly_num_vars;
918
919 WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*sizeof(WORD), "AN.poly_vars");
920 memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*sizeof(WORD));
921 M_free(AN.poly_vars,"poly_vars");
922 AN.poly_vars = new_poly_vars;
923 AN.poly_vars[AN.poly_num_vars] = sorted[i];
924 AN.poly_num_vars++;
925 }
926 sorted[i] = renum[sorted[i]];
927 }
928 // order variables
929 for (int i=0; i<sorted[2]/2; i++)
930 for (int j=3; j+2<sorted[2]; j+=2)
931 if (sorted[j] > sorted[j+2]) {
932 swap(sorted[j] , sorted[j+2]);
933 swap(sorted[j+1], sorted[j+3]);
934 }
935 }
936
937 sorted += *sorted;
938 }
939
940 *sorted = 0;
941 if ( dynamicAlloc ) {
942 sorted = dynamicAlloc;
943 }
944 else {
945 sorted = AT.WorkPointer;
946 }
947
948 // find pointers to all terms and sort them efficiently
949 vector<const WORD *> terms;
950 for (const WORD *t=sorted; *t!=0; t+=*t)
951 terms.push_back(t);
952 sort(terms.rbegin(),terms.rend(),term_compare);
953
954 // construct the Horner tree
955 vector<WORD> res;
956 build_Horner_tree(&terms[0], terms.size(), 0, AN.poly_num_vars, 0, &res);
957
958 // remove leading zeroes in coefficients
959 int j=0;
960 for (int i=0; i<(int)res.size();) {
961 if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
962 res[j++] = res[i++];
963 else if (res[i]==SYMBOL) {
964 memmove(&res[j], &res[i], res[i+1]*sizeof(WORD));
965 i+=res[j+1];
966 j+=res[j+1];
967 }
968 else if (res[i]==SNUMBER) {
969 int n = (res[i+1]-2)/2;
970 int dn = 0;
971 while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
972 res[j++] = SNUMBER;
973 res[j++] = 2*(n-dn) + 3;
974 memmove(&res[j], &res[i+2], (n-dn)*sizeof(WORD));
975 j+=n-dn;
976 memmove(&res[j], &res[i+n+2], (n-dn)*sizeof(WORD));
977 j+=n-dn;
978 res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
979 i+=2*n+3;
980 }
981 }
982 res.resize(j);
983
984 if ( dynamicAlloc ) {
985 M_free(dynamicAlloc, "Horner_tree alloc");
986 }
987
988#ifdef DEBUG
989 MesPrint ("*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
990#endif
991
992 return res;
993}
994
995/*
996 #] Horner_tree :
997 #[ print_tree :
998*/
999
1000// print Horner tree (for debugging)
1001void print_tree (const vector<WORD> &tree) {
1002
1003 GETIDENTITY;
1004
1005 for (int i=0; i<(int)tree.size();) {
1006 if (tree[i]==OPER_ADD) {
1007 MesPrint("+%");
1008 i++;
1009 }
1010 else if (tree[i]==OPER_MUL) {
1011 MesPrint("*%");
1012 i++;
1013 }
1014 else if (tree[i]==OPER_COMMA) {
1015 MesPrint(",%");
1016 i++;
1017 }
1018 else if (tree[i]==SNUMBER) {
1019 UBYTE buf[100];
1020 int n = tree[i+tree[i+1]-1]/2;
1021 PrtLong((UWORD *)&tree[i+2], n, buf);
1022 int l = strlen((char *)buf);
1023 buf[l]='/';
1024 n=ABS(n);
1025 PrtLong((UWORD *)&tree[i+2+n], n, buf+l+1);
1026 MesPrint("%s%",buf);
1027 i+=tree[i+1];
1028 }
1029 else if (tree[i]==SYMBOL) {
1030 if (AN.poly_vars[tree[i+2]] < 10000)
1031 MesPrint("%s^%d%", VARNAME(symbols,AN.poly_vars[tree[i+2]]), tree[i+3]);
1032 else
1033 MesPrint("Z%d^%d%", MAXVARIABLES-AN.poly_vars[tree[i+2]], tree[i+3]);
1034 i+=tree[i+1];
1035 }
1036 else {
1037 MesPrint("error");
1038 exit(1);
1039 }
1040
1041 MesPrint(" %");
1042 }
1043
1044 MesPrint("");
1045}
1046
1047/*
1048 #] print_tree :
1049 #[ generate_instructions :
1050*/
1051
1052
1053struct CSEHash {
1054 size_t operator()(const vector<WORD>& n) const {
1055 return n[0];
1056 }
1057};
1058
1059struct CSEEq {
1060 bool operator()(const vector<WORD>& lhs, const vector<WORD>& rhs) const {
1061 if (lhs.size() != rhs.size()) return false;
1062 for (unsigned int i = 1; i < lhs.size(); i++) {
1063 if (lhs[i] != rhs[i]) return false;
1064 }
1065 return true;
1066 }
1067};
1068
1069
1070template<typename T> size_t hash_range(T* array, int size) {
1071 size_t hash = 0;
1072
1073 for (int i = 0; i < size; i++) {
1074 hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
1075 }
1076
1077 return hash;
1078}
1079
1132vector<WORD> generate_instructions (const vector<WORD> &tree, bool do_CSE) {
1133#ifdef DEBUG
1134 MesPrint ("*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1135 thetime_str().c_str(), do_CSE?1:0);
1136#endif
1137
1138 typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1139 csemap ID;
1140
1141 // reserve lots of space, to prevent later rehashes
1142 // TODO: what if this is too large? make a parameter?
1143 if (do_CSE) {
1144 ID.rehash(mcts_expr_score * 2);
1145 }
1146
1147 // s is a stack of operands to process when you encounter operators
1148 // in the postfix expression tree. Operands consist of three WORDs,
1149 // formatted as the LHS/RHS of the keys in ID.
1150 stack<int> s;
1151 vector<WORD> instr;
1152 WORD numinstr = 0;
1153 vector<WORD> x;
1154
1155 // process the expression tree
1156 for (int i=0; i<(int)tree.size();) {
1157 x.clear();
1158
1159 if (tree[i]==SNUMBER) {
1160 WORD hash = hash_range(&tree[i], tree[i + 1]);
1161 x.push_back(hash);
1162 x.push_back(SNUMBER);
1163 x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1164 int sign = SGN(x.back());
1165 x.back() *= sign;
1166
1167 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1168 s.push(0);
1169 s.push(sign * suc.first->second);
1170 s.push(SNUMBER);
1171 s.push(hash);
1172 i+=tree[i+1];
1173 }
1174 else if (tree[i]==SYMBOL) {
1175 WORD hash = hash_range(&tree[i], tree[i + 1]);
1176 if (tree[i+3]>1) {
1177 x.push_back(hash);
1178 x.push_back(SYMBOL);
1179 x.push_back(tree[i+2]+1); // variable (1-indexed)
1180 x.push_back(tree[i+3]); // power
1181
1182 csemap::iterator it = ID.find(x);
1183 if (do_CSE && it != ID.end()) {
1184 // already-seen power of a symbol
1185 s.push(1);
1186 s.push(it->second);
1187 s.push(EXTRASYMBOL);
1188 } else {
1189 //MesPrint("strange");
1190 if (numinstr == MAXPOSITIVE) {
1191 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1192 Terminate(-1);
1193 }
1194
1195 // new power greater than 1 of a symbol
1196 instr.push_back(numinstr); // expr.nr
1197 instr.push_back(OPER_MUL); // operator
1198 instr.push_back(9+OPTHEAD); // length total
1199 instr.push_back(8); // length operand
1200 instr.push_back(SYMBOL); // SYMBOL
1201 instr.push_back(4); // length symbol
1202 instr.push_back(tree[i+2]); // variable
1203 instr.push_back(tree[i+3]); // power
1204 instr.push_back(1); // numerator
1205 instr.push_back(1); // denominator
1206 instr.push_back(3); // length coeff
1207 instr.push_back(0); // trailing 0
1208
1209 ID[x] = ++numinstr;
1210 s.push(1);
1211 s.push(numinstr);
1212 s.push(EXTRASYMBOL);
1213 }
1214 }
1215 else {
1216 // power of 1
1217 s.push(tree[i+3]); // power
1218 s.push(tree[i+2]+1); // variable (1-indexed)
1219 s.push(SYMBOL);
1220 }
1221
1222 s.push(hash); // push hash
1223 i+=tree[i+1];
1224 }
1225 else { // tree[i]==OPERATOR
1226 int oper = tree[i];
1227 i++;
1228
1229 x.push_back(0); // placeholder for hash
1230 x.push_back(oper);
1231 vector<WORD> hash;
1232 hash.push_back(oper);
1233
1234 // get two operands from the stack
1235 for (int operand=0; operand<2; operand++) {
1236 hash.push_back(s.top()); s.pop();
1237 x.push_back(s.top()); s.pop();
1238 x.push_back(s.top()); s.pop();
1239 x.push_back(s.top()); s.pop();
1240 }
1241
1242 x[0] = hash_range(&hash[0], 3);
1243
1244 // get rid of multiplications by +/-1
1245 if (oper==OPER_MUL) {
1246 bool do_continue = false;
1247
1248 for (int operand=0; operand<2; operand++) {
1249 int idx_oper1 = operand==0 ? 2 : 5;
1250 int idx_oper2 = operand==0 ? 5 : 2;
1251
1252 // check whether operand 1 equals +/-1
1253 if (x[idx_oper1]==SNUMBER) {
1254
1255 int idx = ABS(x[idx_oper1+1])-1;
1256 if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1257 // push +/- other operand and continue
1258 s.push(x[idx_oper2+2]);
1259 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1260 s.push(x[idx_oper2]);
1261 s.push(hash[1 + (operand + 1) % 2]);
1262 do_continue = true;
1263 break;
1264 }
1265 }
1266 }
1267
1268 if (do_continue) continue;
1269 }
1270
1271 // check whether this subexpression has been seen before
1272 // if not, generate instruction to define it
1273 csemap::iterator it = ID.find(x);
1274 if (!do_CSE || it == ID.end()) {
1275 if (numinstr == MAXPOSITIVE) {
1276 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1277 Terminate(-1);
1278 }
1279
1280 instr.push_back(numinstr); // expr.nr.
1281 instr.push_back(x[1]); // operator
1282 instr.push_back(3); // length
1283
1284 ID[x] = ++numinstr;
1285
1286 int lenidx = instr.size()-1;
1287
1288 for (int j=0; j<2; j++)
1289 if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
1290 instr.push_back(8); // length total
1291 instr.push_back(x[3*j+2]); // (extra)symbol
1292 instr.push_back(4); // length (extra)symbol
1293 instr.push_back(ABS(x[3*j+3])-1); // variable (0-indexed)
1294 instr.push_back(x[3*j+4]); // power
1295 instr.push_back(1); // numerator
1296 instr.push_back(1); // denominator
1297 instr.push_back(3*SGN(x[3*j+3])); // length coeff
1298 instr[lenidx] += 8;
1299 }
1300 else { // x[3*j+1]==SNUMBER
1301 int t = ABS(x[3*j+3])-1;
1302 instr.push_back(tree[t+1]-1); // length number
1303 instr.insert(instr.end(), &tree[t+2], &tree[t]+tree[t+1]); // digits
1304 instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
1305 instr[lenidx] += tree[t+1]-1;
1306 }
1307
1308 instr.push_back(0); // trailing 0
1309 instr[lenidx]++;
1310 }
1311
1312 // push new expression on the stack
1313 s.push(1);
1314 s.push(ID[x]);
1315 s.push(EXTRASYMBOL);
1316 s.push(x[0]); // push hash
1317 }
1318 }
1319
1320#ifdef DEBUG
1321 MesPrint ("*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1322 thetime_str().c_str(), do_CSE?1:0, count_operators(instr));
1323#endif
1324
1325 return instr;
1326}
1327
1328/*
1329 #] generate_instructions :
1330 #[ count_operators_cse :
1331*/
1332
1333
1341int count_operators_cse (const vector<WORD> &tree) {
1342 //MesPrint ("*** [%s] Starting CSEE", thetime_str().c_str());
1343
1344 typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1345 csemap ID;
1346
1347 // reserve lots of space, to prevent later rehashes
1348 // TODO: what if this is too large? make a parameter?
1349 ID.rehash(mcts_expr_score * 2);
1350
1351 // s is a stack of operands to process when you encounter operators
1352 // in the postfix expression tree. Operands consist of three WORDs,
1353 // formatted as the LHS/RHS of the keys in ID.
1354 stack<int> s;
1355 int numinstr = 0, numcommas = 0;
1356 vector<WORD> x;
1357
1358 // process the expression tree
1359 for (int i=0; i<(int)tree.size();) {
1360 x.clear();
1361
1362 if (tree[i]==SNUMBER) {
1363 WORD hash = hash_range(&tree[i], tree[i + 1]);
1364 x.push_back(hash);
1365 x.push_back(SNUMBER);
1366 x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1367 int sign = SGN(x.back());
1368 x.back() *= sign;
1369
1370 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1371 s.push(0);
1372 s.push(sign * suc.first->second);
1373 s.push(SNUMBER);
1374 s.push(hash);
1375 i+=tree[i+1];
1376 }
1377 else if (tree[i]==SYMBOL) {
1378 WORD hash = hash_range(&tree[i], tree[i + 1]);
1379 if (tree[i+3]>1) {
1380 x.push_back(hash);
1381 x.push_back(SYMBOL);
1382 x.push_back(tree[i+2]+1); // variable (1-indexed)
1383 x.push_back(tree[i+3]); // power
1384
1385 csemap::iterator it = ID.find(x);
1386 if (it != ID.end()) {
1387 // already-seen power of a symbol
1388 s.push(1);
1389 s.push(it->second);
1390 s.push(EXTRASYMBOL);
1391 } else {
1392 if (tree[i + 3] == 2)
1393 numinstr++;
1394 else
1395 numinstr += (int)floor(log(tree[i+3])/log(2.0)) + popcount(tree[i+3]) - 1;
1396
1397 ID[x] = numinstr;
1398 s.push(1);
1399 s.push(numinstr);
1400 s.push(EXTRASYMBOL);
1401 }
1402 }
1403 else {
1404 // power of 1
1405 s.push(tree[i+3]); // power
1406 s.push(tree[i+2]+1); // variable (1-indexed)
1407 s.push(SYMBOL);
1408 }
1409
1410 s.push(hash); // push hash
1411
1412 i+=tree[i+1];
1413 }
1414 else { // tree[i]==OPERATOR
1415 int oper = tree[i];
1416 i++;
1417
1418 x.push_back(0); // placeholder for hash
1419 x.push_back(oper);
1420 vector<WORD> hash;
1421 hash.push_back(oper);
1422
1423 // get two operands from the stack
1424 for (int operand=0; operand<2; operand++) {
1425 hash.push_back(s.top()); s.pop();
1426 x.push_back(s.top()); s.pop();
1427 x.push_back(s.top()); s.pop();
1428 x.push_back(s.top()); s.pop();
1429 }
1430
1431
1432 x[0] = hash_range(&hash[0], 3);
1433
1434 // get rid of multiplications by +/-1
1435 if (oper==OPER_MUL) {
1436 bool do_continue = false;
1437
1438 for (int operand=0; operand<2; operand++) {
1439 int idx_oper1 = operand==0 ? 2 : 5;
1440 int idx_oper2 = operand==0 ? 5 : 2;
1441
1442 // check whether operand 1 equals +/-1
1443 if (x[idx_oper1]==SNUMBER) {
1444
1445 int idx = ABS(x[idx_oper1+1])-1;
1446 if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1447 // push +/- other operand and continue
1448 s.push(x[idx_oper2+2]);
1449 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1450 s.push(x[idx_oper2]);
1451 s.push(hash[1 + (operand + 1) % 2]);
1452 do_continue = true;
1453 break;
1454 }
1455 }
1456 }
1457
1458 if (do_continue) continue;
1459 }
1460
1461 // check whether this subexpression has been seen before
1462 // if not, generate instruction to define it
1463 csemap::iterator it = ID.find(x);
1464 if (it == ID.end()) {
1465 if (numinstr == MAXPOSITIVE) {
1466 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1467 Terminate(-1);
1468 }
1469
1470 if (oper == OPER_COMMA) numcommas++;
1471 ID[x] = ++numinstr;
1472 s.push(1);
1473 s.push(numinstr);
1474 s.push(EXTRASYMBOL);
1475 } else {
1476 // push new expression on the stack
1477 s.push(1);
1478 s.push(it->second);
1479 s.push(EXTRASYMBOL);
1480 }
1481
1482 s.push(x[0]); // push hash
1483 }
1484 }
1485
1486 //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1487 return numinstr - numcommas;
1488}
1489
1490/*
1491 #] count_operators_cse :
1492 #[ count_operators_cse_topdown :
1493*/
1494
1495typedef struct node {
1496 const WORD* data;
1497 struct node* l;
1498 struct node* r; // TODO: add l,r to data?
1499 WORD sign; // TODO: use data for this?
1500 UWORD hash;
1501
1502 node() : l(NULL), r(NULL), sign(1), hash(0) {};
1503 node(const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
1504
1505 // a minus sign in the tree should only count as a different entry if
1506 // it is a compound expression: a = -a, but T+-V != T+V
1507 int cmp(const struct node* rhs) const {
1508 if (this == rhs) return 0;
1509
1510 if (data[0] != rhs->data[0]) return data[0] < rhs->data[0] ? -1 : 1;
1511 int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1512 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1513 for (int i = 0; i < data[1] + mod; i++) {
1514 if (data[i] != rhs->data[i]) return data[i] < rhs->data[i] ? -1 : 1;
1515 }
1516 } else {
1517 int lv = l->cmp(rhs->l);
1518 if (lv != 0) return lv;
1519 int rv = r->cmp(rhs->r);
1520 if (rv != 0) return rv;
1521
1522 // TODO: only for ADD operation
1523 if (l->sign != rhs->l->sign) return l->sign < rhs->l->sign ? -1 : 1;
1524 if (r->sign != rhs->r->sign) return r->sign < rhs->r->sign ? -1 : 1;
1525 }
1526
1527 return 0;
1528 }
1529
1530 // less than operator
1531 bool operator() (const struct node* lhs, const struct node* rhs) const
1532 {
1533 return lhs->cmp(rhs) < 0;
1534 }
1535
1536 void calcHash() {
1537 int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1538 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1539 hash = hash_range(data, data[1] + mod);
1540 } else {
1541 if (l->hash == 0) l->calcHash();
1542 if (r->hash == 0) r->calcHash();
1543
1544 // signs only matter for compound expressions
1545 size_t newr[] = {(size_t)data[0], l->hash, (size_t)l->sign, r->hash, (size_t)r->sign};
1546 hash = hash_range(newr, 5);
1547 }
1548 }
1549} NODE;
1550
1551struct NodeHash {
1552 size_t operator()(const NODE* n) const {
1553 return n->hash; // already computed
1554 }
1555};
1556
1557struct NodeEq {
1558 bool operator()(const NODE* lhs, const NODE* rhs) const {
1559 return lhs->cmp(rhs) == 0;
1560 }
1561};
1562
1563NODE* buildTree(vector<WORD> &tree) {
1564 //MesPrint ("*** [%s] Starting CSEE topdown", thetime_str().c_str());
1565
1566 // allocate spaces for the tree, cannot be more nodes than tree size
1567 NODE* ar = (NODE*)Malloc1(tree.size() * sizeof(NODE), "CSE tree");
1568 NODE* c = 0;
1569 unsigned int curIndex = 0;
1570
1571 stack<NODE*> st;
1572 for (int i=0; i<(int)tree.size();) {
1573 c = ar + curIndex;
1574 new (c) NODE(&tree[i]); // placement new
1575 curIndex++;
1576
1577 if (tree[i]==SYMBOL || tree[i] == SNUMBER) {
1578 // extract the sign to a new class member
1579 if (tree[i] == SNUMBER) {
1580 c->sign = SGN(tree[i + tree[i + 1] -1]);
1581 }
1582
1583 c->calcHash();
1584 st.push(c);
1585 i+=tree[i+1];
1586 } else {
1587 c->r = st.top(); st.pop();
1588 c->l = st.top(); st.pop();
1589
1590 // filter *1 and *-1
1591 // TODO: also multiply if there are two numbers?
1592 if (c->data[0] == OPER_MUL) {
1593 NODE* ch[] = {c->r, c->l};
1594 for (int j = 0; j < 2; j++)
1595 if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
1596 ch[(j+1)%2]->sign *= ch[j]->sign; // transfer sign
1597 c = ch[(j+1)%2];
1598 break;
1599 }
1600 }
1601
1602 c->calcHash();
1603 st.push(c);
1604 i++;
1605 }
1606 }
1607
1608 // TODO: reallocate to smaller size? Could save memory
1609 //MesPrint("Memory difference: %d vs %d", curIndex, tree.size());
1610
1611 // we want to make the root of the tree the first element
1612 // so that we can easily free the array.
1613 // we swap the first element with the root
1614 // we need to change the pointer in the operator node that has this element as a child
1615 // TODO: check performance
1616 for (unsigned int i = 0; i < curIndex; i++) {
1617 if (ar[i].l == ar) ar[i].l = st.top();
1618 if (ar[i].r == ar) ar[i].r = st.top();
1619 }
1620
1621 swap(ar[0], *st.top());
1622 return ar;
1623}
1624
1625int count_operators_cse_topdown (vector<WORD> &tree) {
1626 typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
1627 nodeset ID;
1628
1629 // reserve lots of space, to prevent later rehashes
1630 // TODO: what if this is too large? make a parameter?
1631 ID.rehash(mcts_expr_score * 2);
1632
1633 int numinstr = 0;
1634
1635 NODE* root = buildTree(tree);
1636
1637 stack<NODE*> stack;
1638 stack.push(root);
1639 while (!stack.empty())
1640 {
1641 NODE* c = stack.top();
1642 stack.pop();
1643
1644 if (c->data[0] == SYMBOL) {
1645 if (c->data[3] > 1) {
1646 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1647 if (suc.second) { // new
1648 if (c->data[3] == 2)
1649 numinstr++;
1650 else
1651 numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
1652 }
1653 }
1654 } else {
1655 if (c->data[0] != SNUMBER) {
1656 // operator
1657 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1658 if (suc.second) {
1659 stack.push(c->r);
1660 stack.push(c->l);
1661
1662 // ignore OPER_COMMA
1663 if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
1664 numinstr++;
1665 }
1666 }
1667 }
1668 }
1669
1670 //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1671 M_free(root, "CSE tree");
1672
1673 return numinstr;
1674}
1675
1676/*
1677 #] count_operators_cse_topdown :
1678 #[ simulated_annealing :
1679*/
1680vector<WORD> simulated_annealing() {
1681 float minT = AO.Optimize.saMinT.fval;
1682 float maxT = AO.Optimize.saMaxT.fval;
1683 float T = maxT;
1684 float coolrate = pow(minT / maxT, 1 / (float)AO.Optimize.saIter);
1685
1686 GETIDENTITY;
1687
1688 // create a valid state where FACTORSYMBOL/SEPARATESYMBOL remains first
1689 vector<WORD> state = occurrence_order(optimize_expr, false);
1690 int startindex = 0;
1691 if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
1692 if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
1693
1694 my_random_shuffle(BHEAD state.begin() + startindex, state.end()); // start from random scheme
1695
1696 vector<WORD> tree = Horner_tree(optimize_expr, state);
1697 int curscore = count_operators_cse_topdown(tree);
1698
1699 // clean poly_vars, that are allocated by Horner_tree
1700 AN.poly_num_vars = 0;
1701 M_free(AN.poly_vars,"poly_vars");
1702
1703 std::vector<WORD> best = state; // best state
1704 int bestscore = curscore;
1705
1706 if (startindex == (int)state.size() || (int)state.size() - startindex < 2) {
1707 return best;
1708 }
1709
1710 for (int o = 0; o < AO.Optimize.saIter; o++) {
1711 int inda = iranf(BHEAD state.size() - startindex) + startindex;
1712 int indb = iranf(BHEAD state.size() - startindex) + startindex;
1713
1714 if (inda == indb) {
1715 continue;
1716 }
1717
1718 swap(state[inda], state[indb]); // swap works best for Horner
1719
1720 vector<WORD> tree = Horner_tree(optimize_expr, state);
1721 int newscore = count_operators_cse_topdown(tree);
1722
1723 // clean poly_vars, that are allocated by Horner_tree
1724 AN.poly_num_vars = 0;
1725 M_free(AN.poly_vars,"poly_vars");
1726
1727 if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
1728 curscore = newscore;
1729
1730 if (curscore < bestscore) {
1731 bestscore = curscore;
1732 best = state;
1733 }
1734 } else {
1735 swap(state[inda], state[indb]);
1736 }
1737
1738#ifdef DEBUG_SA
1739 MesPrint("Score at step %d: %d", o, curscore);
1740#endif
1741 T *= coolrate;
1742 }
1743
1744#ifdef DEBUG_SA
1745 MesPrint("Simulated annealing score: %d", bestscore);
1746#endif
1747
1748 return best;
1749}
1750
1751/*
1752 #] simulated_annealing :
1753 #[ printpstree :
1754*/
1755
1756/*
1757// print MCTS tree with LaTeX/pstricks (for analysis)
1758void printpstree_rec (tree_node x, string pre="") {
1759
1760 if (x.num_visits==1) {
1761 MesPrint("%s\\TR{%d}",pre.c_str(),x.var);
1762 }
1763 else {
1764 MesPrint("%s\\pstree%s{\\TR{%d}}{",pre.c_str(),
1765 pre==" "?"[nodesep=0, levelsep=40]":"",
1766 x.var);
1767 for (int i=0; i<(int)x.childs.size(); i++)
1768 if (x.childs[i].num_visits>0)
1769 printpstree_rec(x.childs[i], pre+" ");
1770 MesPrint("%s}",pre.c_str());
1771 }
1772}
1773
1774void printpstree () {
1775 // draw tree with pstricks
1776 MesPrint ("\\documentclass{article}");
1777 MesPrint ("\\usepackage{pstricks,pst-node,pst-tree,graphicx}");
1778 MesPrint ("\\begin{document}");
1779 MesPrint ("\\scalebox{0.02}{");
1780 printpstree_rec(mcts_root," ");
1781 MesPrint ("}");
1782 MesPrint ("\\end{document}");
1783}
1784*/
1785
1786/*
1787 #] printpstree :
1788 #[ find_Horner_MCTS_expand_tree :
1789*/
1790
1822/*
1823 #[ next_MCTS_scheme :
1824*/
1825
1826// find a Horner scheme to be used for the next simulation
1827inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
1828
1829 vector<WORD> &order = *porder;
1830 vector<WORD> &schemev = *pscheme;
1831 vector<tree_node *> &path = *ppath;
1832 int depth = 0, nchild0;
1833 float slide_down_factor = 1.0;
1834
1835 order.clear();
1836 path.clear();
1837
1838 // MCTS step I: select
1839 tree_node *select = &mcts_root;
1840 path.push_back(select);
1841 nchild0 = select->childs.size();
1842 while (select->childs.size() > 0) {
1843 // add virtual loss
1844 select->num_visits++;
1845 select->sum_results+=mcts_expr_score;
1846
1847//-------------------------------------------------------------------
1848 switch ( AO.Optimize.mctsdecaymode ) {
1849 case 1: // Based on http://arxiv.org/abs/arXiv:1312.0841
1850 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1851 break;
1852 case 2: // This gives a bit more cleanup time at the end.
1853 if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
1854 slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
1855 slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
1856 }
1857 else {
1858 slide_down_factor = 0.0001;
1859 }
1860 break;
1861 case 3: // depth dependent factor combined with case 1
1862 float dd = 1.0-(1.0*depth)/(1.0*nchild0);
1863 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1864 if ( dd <= 0.000001 ) slide_down_factor = 1.0;
1865 else slide_down_factor /= dd;
1866 if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
1867 break;
1868 }
1869//-------------------------------------------------------------------
1870
1871#ifdef DEBUG_MCTS
1872 MesPrint("select %d",select->var);
1873#endif
1874
1875 // find most-promising node
1876 double best=0;
1877 tree_node *next=NULL;
1878 for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
1879 double score;
1880 if (p->num_visits >= 1) {
1881
1882 // there are results calculated, so select with the UCT formula
1883 score = mcts_expr_score / (p->sum_results/p->num_visits) +
1884//-------------------------------------------------------------------------
1885 slide_down_factor *
1886//-------------------------------------------------------------------------
1887 2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
1888
1889#ifdef DEBUG_MCTS
1890 printf("%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1891 p->num_visits,p->finished?1:0);
1892 fflush(stdout);
1893#endif
1894 }
1895 else {
1896 // no results yet, so select this node by setting score=infinite
1897 score = 1e100;
1898
1899#ifdef DEBUG_MCTS
1900 printf("%d: inf\n",p->var); fflush(stdout);
1901#endif
1902 }
1903
1904 // update best candidate
1905 if (!p->finished && score>best) {
1906 best=score;
1907 next=&*p;
1908 }
1909 }
1910
1911 // if no node is found, this node is finished
1912 if (next==NULL) {
1913 select->finished=true;
1914 break;
1915 }
1916
1917 // traverse down the tree
1918 select = next;
1919 path.push_back(select);
1920 order.push_back(select->var);
1921 depth++;
1922 }
1923
1924 // MCTS step II: expand
1925
1926#ifdef DEBUG_MCTS
1927 MesPrint("expand %d",select->var);
1928#endif
1929
1930 // variables used so far
1931 set<WORD> var_used;
1932
1933 for (int i=0; i<(int)order.size(); i++)
1934 var_used.insert(ABS(order[i])-1);
1935
1936 // if this a new node, create node and add children
1937 if (!select->finished && select->childs.size()==0) {
1938 tree_node new_node(select->var);
1939 int sign = (order.size() == 0) ? 1 : SGN(order.back());
1940 for (int i=0; i<(int)mcts_vars.size(); i++)
1941 if (!var_used.count(mcts_vars[i])) {
1942 new_node.childs.push_back(tree_node(sign*(mcts_vars[i]+1)));
1943 if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
1944 new_node.childs.push_back(tree_node(-sign*(mcts_vars[i]+1)));
1945 }
1946 my_random_shuffle(BHEAD new_node.childs.begin(), new_node.childs.end());
1947
1948 // here locking is necessary, since operator=(tree_node) is a
1949 // non-atomic operation (using pointers makes this lock obsolete)
1950 LOCK(optimize_lock);
1951 *select = new_node;
1952 UNLOCK(optimize_lock);
1953 }
1954 // set finished if necessary
1955 if (select->childs.size()==0)
1956 select->finished = true;
1957
1958 // add virtual loss of number of operators in original expression
1959 select->num_visits++;
1960 select->sum_results+=mcts_expr_score;
1961
1962 // MCTS step III: simulation
1963
1964 // create complete Horner scheme
1965 deque<WORD> scheme;
1966
1967 for (int i=0; i<(int)mcts_vars.size(); i++)
1968 if (!var_used.count(mcts_vars[i]))
1969 scheme.push_back(mcts_vars[i]);
1970 my_random_shuffle(BHEAD scheme.begin(), scheme.end());
1971
1972 for (int i=(int)order.size()-1; i>=0; i--) {
1973 if (order[i] > 0)
1974 scheme.push_front(order[i]-1);
1975 else
1976 scheme.push_back(-order[i]-1);
1977 }
1978
1979 // add FACTORSYMBOL/SEPARATESYMBOL is necessary
1980 if (mcts_factorized)
1981 scheme.push_front(FACTORSYMBOL);
1982 if (mcts_separated)
1983 scheme.push_front(SEPARATESYMBOL);
1984
1985 // Horner scheme as a vector
1986 schemev = vector<WORD>(scheme.begin(), scheme.end());
1987
1988}
1989
1990/*
1991 #] next_MCTS_scheme :
1992 #[ try_MCTS_scheme :
1993*/
1994
1995// count the number of operators in the given Horner scheme
1996inline static void try_MCTS_scheme (PHEAD const vector<WORD> &scheme, int *pnum_oper) {
1997
1998 // do Horner, CSE and count the number of operators
1999 vector<WORD> tree = Horner_tree(optimize_expr, scheme);
2000 //vector<WORD> instr = generate_instructions(tree, true);
2001 //int num_oper = count_operators(instr);
2002 //int num_oper2 = count_operators_cse(tree);
2003 //int num_oper2 = count_operators_cse_topdown(tree);
2004 //MesPrint("%d %d", num_oper, num_oper2);
2005
2006 int num_oper = count_operators_cse_topdown(tree);
2007
2008 // clean poly_vars, that is allocated by Horner_tree
2009 AN.poly_num_vars = 0;
2010 M_free(AN.poly_vars,"poly_vars");
2011
2012 *pnum_oper = num_oper;
2013
2014}
2015
2016/*
2017 #] try_MCTS_scheme :
2018 #[ update_MCTS_scheme :
2019*/
2020
2021// update the best score and the search tree
2022inline static void update_MCTS_scheme (int num_oper, const vector<WORD> &scheme, vector<tree_node *> *ppath) {
2023
2024 vector<tree_node *> &path = *ppath;
2025
2026 // update the (global) list of best Horner scheme
2027 if ((int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
2028 (--mcts_best_schemes.end())->first > num_oper) {
2029 // here locking is necessary, for otherwise best_schemes may
2030 // become corrupted; lock can be prevented if each thread keeps
2031 // track of it's own list and those lists are merged in the end,
2032 // but this seems not useful to implement
2033 LOCK(optimize_lock);
2034 mcts_best_schemes.insert(make_pair(num_oper,scheme));
2035 if ((int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
2036 mcts_best_schemes.erase(--mcts_best_schemes.end());
2037 UNLOCK(optimize_lock);
2038 }
2039
2040 // MCTS step IV: backpropagate
2041
2042 // add number of operator and subtract mcts_expr_score, which
2043 // behaves as a "virtual loss"
2044 for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
2045 (*p)->sum_results += num_oper - mcts_expr_score;
2046
2047}
2048
2049/*
2050 #] update_MCTS_scheme :
2051*/
2052
2053void find_Horner_MCTS_expand_tree () {
2054
2055#ifdef DEBUG
2056 MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2057#endif
2058
2059 GETIDENTITY;
2060
2061 // the order for the Horner scheme up to the selected node, with signs
2062 // indicating forward or backward.
2063 vector<WORD> order;
2064
2065 // complete Horner scheme
2066 vector<WORD> scheme;
2067
2068 // path to the selected node
2069 vector<tree_node *> path;
2070
2071 // the number of operations obtained by the simulation
2072 int num_oper;
2073
2074 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2075 try_MCTS_scheme(BHEAD scheme, &num_oper);
2076#ifdef DEBUG_MCTS
2077 // Actually "order" is needed only for this debug output.
2078 MesPrint ("{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2079#endif
2080 update_MCTS_scheme(num_oper, scheme, &path);
2081
2082#ifdef DEBUG
2083 MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2084 thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2085#endif
2086
2087}
2088
2089/*
2090 #] find_Horner_MCTS_expand_tree :
2091 #[ PF_find_Horner_MCTS_expand_tree :
2092*/
2093#ifdef WITHMPI
2094
2095// To remember which task is assigned to each slave. A task is represented by
2096// a pair of a Horner scheme and the selected path for the scheme.
2097// The index range is from 1 to PF.numtasks-1.
2098vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2099
2100// Initialization.
2101void PF_find_Horner_MCTS_expand_tree_master_init () {
2102
2103 PF_opt_MCTS_tasks.resize(PF.numtasks);
2104 for (int i = 1; i < PF.numtasks; i++) {
2105 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2106 p.first.clear();
2107 p.second.clear();
2108 }
2109
2110}
2111
2112// Wait for an idle slave and return the process number.
2113int PF_find_Horner_MCTS_expand_tree_master_next () {
2114
2115 // Find an idle slave.
2116 int next;
2117 PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2118
2119 // Check if the slave had a task.
2120 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2121 if (!p.first.empty()) {
2122 // If so, update the result.
2123 int num_oper;
2124 PF_Unpack(&num_oper, 1, PF_INT);
2125 update_MCTS_scheme(num_oper, p.first, &p.second);
2126
2127 // Clear the task.
2128 p.first.clear();
2129 p.second.clear();
2130 }
2131
2132 return next;
2133
2134}
2135
2136// The main function on the master.
2137void PF_find_Horner_MCTS_expand_tree_master () {
2138
2139#ifdef DEBUG
2140 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2141#endif
2142
2143 vector<WORD> order;
2144 vector<WORD> scheme;
2145 vector<tree_node *> path;
2146
2147 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2148
2149 // Find an idle slave.
2150 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2151
2152 // Send a new task to the slave.
2154 int len = scheme.size();
2155 PF_LongSinglePack(&len, 1, PF_INT);
2156 PF_LongSinglePack(&scheme[0], len, PF_WORD);
2157 PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2158
2159 // Remember the task.
2160 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2161 p.first = scheme;
2162 p.second = path;
2163
2164#ifdef DEBUG
2165 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2166#endif
2167
2168}
2169
2170// Wait for all the slaves to finish their tasks.
2171void PF_find_Horner_MCTS_expand_tree_master_wait () {
2172
2173#ifdef DEBUG
2174 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2175#endif
2176
2177 // Wait for all the slaves.
2178 for (int i = 1; i < PF.numtasks; i++) {
2179 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2180 // Send a null task.
2182 int len = 0;
2183 PF_LongSinglePack(&len, 1, PF_INT);
2184 PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2185 }
2186
2187#ifdef DEBUG
2188 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2189#endif
2190
2191}
2192
2193// The main function on the slaves.
2194void PF_find_Horner_MCTS_expand_tree_slave () {
2195
2196#ifdef DEBUG
2197 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2198#endif
2199
2200 vector<WORD> scheme;
2201
2202 {
2203 // Send the first message to the master, which indicates I am idle.
2205 int dummy = 0;
2206 PF_Pack(&dummy, 1, PF_INT);
2207 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2208 }
2209
2210 for (;;) {
2211 // Get a task from the master.
2212 PF_LongSingleReceive(MASTER, PF_OPT_MCTS_MSGTAG, NULL, NULL);
2213
2214 // Length of the task.
2215 int len;
2216 PF_LongSingleUnpack(&len, 1, PF_INT);
2217
2218 // No task remains.
2219 if (len == 0) break;
2220
2221 // Perform the given task.
2222 scheme.resize(len);
2223 PF_LongSingleUnpack(&scheme[0], len, PF_WORD);
2224 int num_oper;
2225 try_MCTS_scheme(scheme, &num_oper);
2226
2227 // Send the result to the master.
2229 PF_Pack(&num_oper, 1, PF_INT);
2230 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2231 }
2232
2233#ifdef DEBUG
2234 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2235#endif
2236
2237}
2238
2239#endif
2240/*
2241 #] PF_find_Horner_MCTS_expand_tree :
2242 #[ find_Horner_MCTS :
2243*/
2244
2253//vector<vector<WORD> > find_Horner_MCTS () {
2255
2256#ifdef WITHMPI
2257 if (PF.me != MASTER) {
2258 if (PF.numtasks <= 1) return;
2259 PF_find_Horner_MCTS_expand_tree_slave();
2260 return;
2261 }
2262#endif
2263
2264#ifdef DEBUG
2265 MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2266#endif
2267
2268 GETIDENTITY;
2269
2270 LONG start_time = TimeWallClock(1);
2271
2272 // initialize the used global variables
2273 mcts_expr_score = count_operators(optimize_expr);
2274 mcts_root = tree_node();
2275
2276 // extract all symbols from the expression
2277 set<WORD> var_set;
2278 for (WORD *t=optimize_expr; *t!=0; t+=*t)
2279 if (t[1] == SYMBOL)
2280 for (int i=3; i<t[2]; i+=2)
2281 var_set.insert(t[i]);
2282
2283 // check for factorized/separated expression and make sure that
2284 // FACTORSYMBOL/SEPARATESYMBOL isn't included in the MCTS
2285 mcts_factorized = var_set.count(FACTORSYMBOL);
2286 if (mcts_factorized) var_set.erase(FACTORSYMBOL);
2287 mcts_separated = var_set.count(SEPARATESYMBOL);
2288 if (mcts_separated) var_set.erase(SEPARATESYMBOL);
2289
2290 mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
2291 optimize_num_vars = (int)mcts_vars.size();
2292 // initialize MCTS tree root
2293 for (int i=0; i<(int)mcts_vars.size(); i++) {
2294 if (AO.Optimize.hornerdirection != O_BACKWARD)
2295 mcts_root.childs.push_back(tree_node(+(mcts_vars[i]+1)));
2296 if (AO.Optimize.hornerdirection != O_FORWARD)
2297 mcts_root.childs.push_back(tree_node(-(mcts_vars[i]+1)));
2298 }
2299 my_random_shuffle(BHEAD mcts_root.childs.begin(), mcts_root.childs.end());
2300
2301#if defined(WITHMPI)
2302 PF_find_Horner_MCTS_expand_tree_master_init();
2303#endif
2304
2305 // initialize a potential variable mctsconstant scheme.
2306 AT.optimtimes = 0;
2307
2308 // call expand_tree until it is called "mctsnumexpand" times, the
2309 // time limit is reached or the tree is fully finished
2310 for (int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
2311 (AO.Optimize.mctstimelimit==0 || (TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
2312 times++) {
2313 AT.optimtimes = times;
2314 // call expand_tree routine depending on threading mode
2315#if defined(WITHPTHREADS)
2316 if (AM.totalnumberofthreads > 1)
2317 find_Horner_MCTS_expand_tree_threaded();
2318 else
2319#elif defined(WITHMPI)
2320 if (PF.numtasks > 1)
2321 PF_find_Horner_MCTS_expand_tree_master();
2322 else
2323#endif
2324 find_Horner_MCTS_expand_tree();
2325 }
2326
2327 // if TForm or ParForm, wait for everyone to finish
2328#ifdef WITHPTHREADS
2329 MasterWaitAll();
2330#endif
2331#ifdef WITHMPI
2332 PF_find_Horner_MCTS_expand_tree_master_wait();
2333#endif
2334#ifdef DEBUG
2335 MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2336#endif
2337
2338}
2339
2340/*
2341 #] find_Horner_MCTS :
2342 #[ merge_operators :
2343*/
2344
2402vector<WORD> merge_operators (const vector<WORD> &all_instr, bool move_coeff) {
2403
2404#ifdef DEBUG_MORE
2405 MesPrint ("*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2406#endif
2407
2408 // get starting positions of instructions
2409 vector<const WORD *> instr;
2410 const WORD *tbegin = &*all_instr.begin();
2411 const WORD *tend = tbegin+all_instr.size();
2412 // copy all instructions to temp space. There will be n of them in instr.
2413 for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
2414 instr.push_back(t);
2415 }
2416 int n = instr.size();
2417
2418 // find parents and number of parents of instructions
2419 vector<int> par(n), numpar(n,0);
2420 for (int i=0; i<n; i++) par[i]=i;
2421
2422 for (int i=0; i<n; i++) {
2423 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2424 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
2425 // extra symbol t[3] is referred to in instr i.
2426 par[*(t+3)]=i;
2427 numpar[*(t+3)]++;
2428
2429 // if coefficient isn't +/-1 or power > 1, increase numpar,
2430 // so that this is not merged
2431 if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
2432 if (*(t+4)>1) numpar[*(t+3)]++;
2433 }
2434 }
2435 }
2436 // determine which instructions to merge
2437 stack<int> s;
2438 s.push(n-1);
2439 vector<bool> vis(n,false);
2440
2441 while (!s.empty()) {
2442
2443 int i=s.top(); s.pop();
2444 if (vis[i]) continue;
2445 vis[i]=true;
2446
2447 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
2448 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
2449 s.push(*(t+3));
2450
2451 // condition: one parent and equal operator as parent
2452 if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
2453 par[i] = par[par[i]]; // The expr into which we subst par[i] to get i
2454 else
2455 par[i] = i;
2456 }
2457
2458 // merge instructions into new instructions
2459 vector<WORD> newinstr;
2460
2461 // stack of new expressions, all 0-indexed
2462 stack<WORD> new_expr;
2463 new_expr.push(n-1);
2464 vis = vector<bool>(n,false);
2465
2466 // skip empty equations (might be introduced by greedy optimizations)
2467 vector<bool> skip(n,false), skipcoeff(n,false);
2468 for (int i=0; i<n; i++)
2469 if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=true;
2470
2471 // for renumbering merged parents
2472 vector<int> renum_par(n);
2473 for (int i=0; i<n; i++)
2474 renum_par[i]=i;
2475
2476 while (!new_expr.empty()) {
2477 int x = new_expr.top(); new_expr.pop();
2478 if (vis[x]) continue;
2479 vis[x] = true;
2480
2481 // find all instructions with parent=x and copy their arguments
2482 // into a new expression
2483 bool first_copy=true;
2484 int lenidx = newinstr.size()+2;
2485
2486 // 1-indexed, since signs may occur
2487 stack<WORD> this_expr;
2488 this_expr.push(x+1);
2489
2490 while (!this_expr.empty()) {
2491 // pop from stack, determine expr.nr and sign
2492 int i = this_expr.top(); this_expr.pop();
2493 int sign = SGN(i);
2494 i = ABS(i)-1;
2495 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) { // terms in i
2496 // don't copy a term if:
2497 // (1) skip=true, since then it's merged into the parent
2498 // (2) extrasymbol with parent=x, because its children should be copied
2499 // (3) coefficient with skipcoeff=true, since it's already copied
2500 bool copy = !skip[i];
2501 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
2502 if (par[*(t+3)] == x) {
2503 // parent of term refers to x. we push it with its sign if no skip is true
2504 // and the sign of the expr.
2505 this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
2506 if (*(instr[i]+1) == OPER_MUL) sign=1;
2507 copy=false;
2508 }
2509 else {
2510 new_expr.push(*(t+3));
2511 }
2512 }
2513
2514 if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
2515 copy=false;
2516 }
2517
2518 if (copy) {
2519 // first term, so add header
2520 if (first_copy) {
2521 newinstr.push_back(renum_par[x]); // expr.nr.
2522 newinstr.push_back(*(instr[x]+1)); // operator
2523 newinstr.push_back(3); // length OPTHEAD?
2524 first_copy=false;
2525 }
2526
2527 // copy term and adjust sign
2528 int thislenidx = newinstr.size();
2529 newinstr.insert(newinstr.end(), t, t+*t); // Put the whole term in newinstr
2530 newinstr.back() *= sign;
2531 if (*(instr[i]+1) == OPER_MUL) sign=1;
2532 newinstr[lenidx] += *t;
2533
2534 // check for moving coefficients up
2535 // necessary condition: MUL-expression with 1 parent
2536 if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
2537 *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
2538
2539 // coefficient is always the first term (that's how Horner+generate works)
2540 const WORD *t1 = instr[*(t+3)]+OPTHEAD;
2541 const WORD *t2 = t1+*t1;
2542
2543 if (*t1 == 1+ABS(*(t1+*t1-1))) {
2544 // t1 pointer to a coefficient, so move it
2545
2546 // remove old coefficient of 1
2547 WORD *t3 = &*newinstr.end(); //
2548 int sign2 = SGN(t3[-1]); //
2549 newinstr.erase(newinstr.end()-3, newinstr.end());
2550 // count number of arguments; iff it is 2 move the (extra)symbol too
2551 int numargs=0;
2552 for (const WORD *tt=t1; *tt!=0; tt+=*tt) {
2553 numargs++;
2554 }
2555 if (numargs==2 && *(t2+4)==1) {
2556 // replace (extra)symbol
2557 newinstr[newinstr.size()-4] = *(t2+1);
2558 newinstr[newinstr.size()-3] = *(t2+2);
2559 newinstr[newinstr.size()-2] = *(t2+3);
2560 newinstr[newinstr.size()-1] = *(t2+4);
2561 sign2 *= SGN(*(t2+*t2-1)); // was t2[7]
2562
2563 // ignore this expression from now on
2564 skip[*(t+3)]=true;
2565 if (*(t2+1)==EXTRASYMBOL)
2566 renum_par[*(t+3)] = *(t2+3);
2567 }
2568 else {
2569 // otherwise, ignore coefficient from now on
2570 // we need to collect the signs of the terms
2571 // first and set them to one. This was forgotten
2572 // before. Gave occasional errors.
2573 if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
2574 for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
2575 if ( tt[*tt-1] < 0 ) {
2576 tt[*tt-1] = -tt[*tt-1];
2577 sign2 = -sign2;
2578 }
2579 }
2580 }
2581 skipcoeff[*(t+3)]=true;
2582 }
2583
2584 // add new coefficient
2585 newinstr.insert(newinstr.end(), t1+1, t1+*t1);
2586 newinstr.back() *= sign2;
2587 newinstr[thislenidx] += ABS(newinstr.back()) - 3;
2588 newinstr[lenidx] += ABS(newinstr.back()) - 3;
2589 }
2590 }
2591 }
2592 }
2593 }
2594
2595 // if something has been copied, add trailing zero
2596 if (!first_copy) {
2597 newinstr.push_back(0);
2598 newinstr[lenidx]++;
2599 }
2600 }
2601
2602 // renumber the expressions to 0,1,2,..,; only keep expressions with
2603 // skip=false which are their own parent after a renumbering in case
2604 // of moved coefficients
2605
2606 // find renumber scheme
2607 vector<int> renum(n,-1);
2608 int next=0;
2609 for (int i=0; i<n; i++)
2610 if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
2611
2612 // find new instruction index
2613 tbegin = &*newinstr.begin();
2614 tend = tbegin+newinstr.size();
2615 for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
2616 instr[renum[*t]] = t;
2617
2618 // renumbering expressions and sort them lexicographically (in
2619 // new_instr they are in the preorder of the expression tree)
2620 vector<WORD> sortinstr;
2621 for (int i=0; i<next; i++) {
2622 int idx = sortinstr.size();
2623 sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2624
2625 sortinstr[idx] = renum[sortinstr[idx]];
2626
2627 // renumber content of an expression
2628 for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
2629 if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
2630 *(t2+3) = renum[*(t2+3)];
2631 }
2632
2633#ifdef DEBUG_MORE
2634 MesPrint ("*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2635#endif
2636
2637 return sortinstr;
2638}
2639
2640/*
2641 #] merge_operators :
2642 #[ class Optimization :
2643*/
2644
2670public:
2671 int type, arg1, arg2, improve;
2672 vector<WORD> coeff;
2673 vector<int> eqnidxs;
2674
2675 bool operator< (const optimization &a) const {
2676 if (arg1 != a.arg1) return arg1 < a.arg1;
2677 if (arg2 != a.arg2) return arg2 < a.arg2;
2678 if (type != a.type) return type < a.type;
2679 return coeff < a.coeff;
2680 }
2681};
2682
2683/*
2684 #] class Optimization :
2685 #[ find_optimizations :
2686*/
2687
2697vector<optimization> find_optimizations (const vector<WORD> &instr) {
2698
2699#ifdef DEBUG_GREEDY
2700 MesPrint ("*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2701#endif
2702
2703// #[ Startup :
2704 // the resulting vector of optimizations
2705 vector<optimization> res;
2706
2707 // a map to count the improvement of an optimization; the
2708 // improvement is stored as a vector<int> with equation numbers
2709 map<optimization, vector<int> > cnt;
2710
2711 // a map to identify coefficients
2712 map<vector<int>,int> idx_coeff;
2713
2714 const WORD *ebegin = &*instr.begin();
2715 const WORD *eend = ebegin+instr.size();
2716
2717 for (int optim_type=0; optim_type<=4; optim_type++) {
2718
2719 cnt.clear();
2720 idx_coeff.clear();
2721
2722 optimization optim;
2723 optim.type = optim_type;
2724// #] Startup :
2725
2726// #[ type 0 : find optimizations of the form z=x^n (optim.type==0)
2727 if (optim_type == 0) {
2728 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2729 if (*(e+1) != OPER_MUL) continue;
2730 for (const WORD *t=e+3; *t!=0; t+=*t) {
2731 if (*t == ABS(*(t+*t-1))+1) continue;
2732 if (*(t+4) > 1) {
2733 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2734 optim.arg2 = *(t+4);
2735 cnt[optim].push_back(e-ebegin);
2736 }
2737 }
2738 }
2739 }
2740// #] type 0 :
2741// #[ type 1 : find optimizations of the form z=x*y (optim.type==1)
2742 if (optim_type == 1) {
2743 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2744 if (*(e+1) != OPER_MUL) continue;
2745
2746 for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2747 if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2748 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2749
2750 for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2751 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2752 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2753
2754 if (*(t1+4) == *(t2+4)) {
2755 optim.arg1 = x1;
2756 optim.arg2 = x2;
2757 if (optim.arg1 > optim.arg2)
2758 swap(optim.arg1, optim.arg2);
2759
2760 if (*(t1+4) == 1)
2761 cnt[optim].push_back(e-ebegin);
2762 else {
2763 // E=x^n*y^n -> z=x*y; E=z^n is double improvement
2764 cnt[optim].push_back(e-ebegin);
2765 cnt[optim].push_back(e-ebegin);
2766 }
2767 }
2768 }
2769 }
2770 }
2771 }
2772// #] type 1 :
2773// #[ type 2 : find optimizations of the form z=c*x (optim.type==2)
2774 if (optim_type == 2) {
2775 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2776
2777 if (*(e+1) == OPER_ADD) {
2778 // in ADD-equation
2779
2780 for (const WORD *t=e+3; *t!=0; t+=*t) {
2781 if (*t == ABS(*(t+*t-1))+1) continue;
2782
2783 if (*(t+4)==1) {
2784 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2785
2786 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2787 optim.coeff.back() = ABS(optim.coeff.back());
2788
2789 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2790 optim.arg2 = 0;
2791
2792 cnt[optim].push_back(e-ebegin);
2793 }
2794 }
2795 }
2796 else if (*(e+1) == OPER_MUL) {
2797 // in MUL-equation
2798 optim.coeff.clear();
2799
2800 for (const WORD *t=e+3; *t!=0; t+=*t)
2801 if (*t == ABS(*(t+*t-1))+1) {
2802 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2803 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2804 optim.coeff.back() = ABS(optim.coeff.back());
2805 }
2806
2807 if (!optim.coeff.empty())
2808 for (const WORD *t=e+3; *t!=0; t+=*t) {
2809 if (*t == ABS(*(t+*t-1))+1) continue;
2810 if (*(t+4) != 1) continue;
2811 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2812 optim.arg2 = 0;
2813 cnt[optim].push_back(e-ebegin);
2814 }
2815 }
2816 }
2817 }
2818// #] type 2 :
2819// #[ type 3 : find optimizations of the form z=x+c (optim.type==3)
2820 if (optim_type == 3) {
2821 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2822
2823 if (*(e+1) != OPER_ADD) continue;
2824
2825 optim.coeff.clear();
2826
2827 for (const WORD *t=e+3; *t!=0; t+=*t)
2828 if (*t == ABS(*(t+*t-1))+1) {
2829 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2830 }
2831
2832 if (!optim.coeff.empty()) {
2833 for (const WORD *t=e+3; *t!=0; t+=*t) {
2834 if (*t == ABS(*(t+*t-1))+1) continue;
2835 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) continue;
2836 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2837
2838 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2839 optim.arg2 = 0;
2840 cnt[optim].push_back(e-ebegin);
2841 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2842 }
2843 }
2844 }
2845 }
2846// #] type 3 :
2847// #[ type 4,5 : find optimizations of the form z=x+y or z=x-y (optim.type==4 or 5)
2848 if (optim_type == 4) {
2849 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2850 if (*(e+1) != OPER_ADD) continue;
2851
2852 for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2853 if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2854 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2855
2856 for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2857 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2858 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2859
2860 int sign1 = SGN(*(t1+*t1-1));
2861 int sign2 = SGN(*(t2+*t2-1));
2862
2863 if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
2864
2865 optim.type = (sign1 * sign2 == 1 ? 4 : 5); // optimization type
2866 optim.arg1 = x1;
2867 optim.arg2 = x2;
2868 if (optim.arg1 > optim.arg2) {
2869 swap(optim.arg1, optim.arg2);
2870 }
2871
2872 if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
2873 cnt[optim].push_back(e-ebegin);
2874 else {
2875 // E=2x+2y -> z=x+y; E=2z is improvement bby itself
2876 cnt[optim].push_back(e-ebegin);
2877 cnt[optim].push_back(e-ebegin);
2878 }
2879 }
2880 }
2881 }
2882 }
2883 }
2884// #] type 4,5 :
2885// #[ add :
2886
2887 // add optimizations with positive improvement to the result
2888 for (map<optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
2889 int improve = i->second.size() - 1;
2890 if (improve > 0) {
2891 res.push_back(i->first);
2892 res.back().improve = improve;
2893 res.back().eqnidxs = i->second;
2894
2895 // remove duplicates, that were add to get the correct improvement
2896 res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
2897 }
2898 }
2899 }
2900// #] add :
2901
2902#ifdef DEBUG_GREEDY
2903 MesPrint ("*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2904#endif
2905
2906 return res;
2907}
2908
2909/*
2910 #] find_optimizations :
2911 #[ do_optimization :
2912*/
2913
2930bool do_optimization (const optimization optim, vector<WORD> &instr, int newid) {
2931
2932// #[ Debug code :
2933#ifdef DEBUG_GREEDY
2934 if (optim.type==0)
2935 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2936 thetime_str().c_str(), optim.improve,
2937 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1, optim.arg2);
2938 else if (optim.type==1 || optim.type>=4)
2939 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2940 thetime_str().c_str(), optim.improve,
2941 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2942 optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
2943 optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
2944 else {
2945 WORD n = optim.coeff.back()/2;
2946 UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2947 PrtLong((UWORD *)&optim.coeff[0], n, num);
2948 PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2949 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2950 thetime_str().c_str(), optim.improve,
2951 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2952 optim.type==2 ? '*' : '+', num,den);
2953 }
2954#endif
2955// #] Debug code :
2956
2957 bool substituted = false;
2958 WORD *ebegin = &*instr.begin();
2959
2960// #[ type 0 : substitution of the form z=x^n (optim.type==0)
2961 if (optim.type == 0) {
2962
2963 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2964 int varnumx = ABS(optim.arg1) - 1;
2965 int n = optim.arg2;
2966
2967 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
2968 WORD *e = ebegin + optim.eqnidxs[i];
2969 if (*(e+1) != OPER_MUL) continue;
2970
2971 // scan through equation
2972 for (WORD *t=e+3; *t!=0; t+=*t) {
2973 if (*t == ABS(*(t+*t-1))+1) continue;
2974 if (*(t+1)==vartypex &&
2975 *(t+3)==varnumx &&
2976 *(t+4) % n == 0) {
2977
2978 // substitute
2979 *(t+1) = EXTRASYMBOL;
2980 *(t+3) = newid;
2981 *(t+4) /= n;
2982
2983 substituted = true;
2984 }
2985 }
2986 }
2987
2988 if (!substituted) {
2989#ifdef DEBUG_GREEDY
2990 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2991#endif
2992 return false;
2993 }
2994
2995 // add extra equation (Tnew = x^n)
2996 instr.push_back(newid); // eqn.nr
2997 instr.push_back(OPER_MUL); // operator
2998 instr.push_back(12); // total length
2999 instr.push_back(8); // term length
3000 instr.push_back(vartypex); // (extra)symbol
3001 instr.push_back(4); // symbol length
3002 instr.push_back(varnumx); // symbol id
3003 instr.push_back(n); // power
3004 instr.push_back(1);
3005 instr.push_back(1); // coefficient 1
3006 instr.push_back(3);
3007 instr.push_back(0); // trailing 0
3008 }
3009// #] type 0 :
3010// #[ type 1 : substitution of the form z=x*y (optim.type==1)
3011 if (optim.type == 1) {
3012
3013 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3014 int varnumx = ABS(optim.arg1) - 1;
3015 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3016 int varnumy = ABS(optim.arg2) - 1;
3017
3018 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3019 WORD *e = ebegin + optim.eqnidxs[i];
3020 if (*(e+1) != OPER_MUL) continue;
3021
3022 // scan through equation
3023 int powx=0, powy=0;
3024 for (WORD *t=e+3; *t!=0; t+=*t) {
3025 if (*t == ABS(*(t+*t-1))+1) continue;
3026 if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
3027 if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
3028 }
3029
3030 // substitute if found
3031 if (powx>0 && powy>0 && powx==powy) {
3032
3033 WORD sign = 1;
3034 WORD *newt = e+3;
3035
3036 for (WORD *t=e+3; *t!=0;) {
3037 int dt=*t;
3038
3039 if (*t == ABS(*(t+*t-1))+1 ||
3040 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3041 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3042 memmove(newt, t, *t*sizeof(WORD));
3043 newt += dt;
3044 }
3045 else {
3046 sign *= SGN(*(t+*t-1));
3047 }
3048
3049 t+=dt;
3050 }
3051
3052 *newt++ = 8; // term length
3053 *newt++ = EXTRASYMBOL; // extrasymbol
3054 *newt++ = 4; // symbol length
3055 *newt++ = newid; // symbol id
3056 *newt++ = powx; // power
3057 *newt++ = 1;
3058 *newt++ = 1; // coefficient +/-1
3059 *newt++ = 3*sign;
3060 *newt++ = 0; // trailing 0
3061
3062 substituted = true;
3063 }
3064 }
3065
3066 if (!substituted) {
3067#ifdef DEBUG_GREEDY
3068 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3069#endif
3070 return false;
3071 }
3072
3073 // add extra equation (Tnew = x*y)
3074 instr.push_back(newid); // eqn.nr
3075 instr.push_back(OPER_MUL); // operator
3076 instr.push_back(20); // total length
3077 instr.push_back(8); // LHS length
3078 instr.push_back(vartypex); // (extra)symbol
3079 instr.push_back(4); // symbol length
3080 instr.push_back(varnumx); // symbol id
3081 instr.push_back(1); // power 1
3082 instr.push_back(1);
3083 instr.push_back(1); // coefficient 1
3084 instr.push_back(3);
3085 instr.push_back(8); // RHS length
3086 instr.push_back(vartypey); // (extra)symbol
3087 instr.push_back(4); // symbol length
3088 instr.push_back(varnumy); // symbol id
3089 instr.push_back(1); // power 1
3090 instr.push_back(1);
3091 instr.push_back(1); // coefficient 1
3092 instr.push_back(3);
3093 instr.push_back(0); // trailing 0
3094 }
3095// #] type 1 :
3096// #[ type 2 : substitution of the form z=c*x (optim.type==2)
3097
3098 if (optim.type == 2) {
3099 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3100 int varnum = ABS(optim.arg1) - 1;
3101
3102 WORD ncoeff = optim.coeff.back();
3103
3104 // scan through equations
3105 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3106 WORD *e = ebegin + optim.eqnidxs[i];
3107
3108 if (*(e+1) == OPER_ADD) {
3109 // scan through ADD-equation
3110 for (WORD *t=e+3; *t!=0; t+=*t) {
3111
3112 if (*t == ABS(*(t+*t-1))+1) continue;
3113
3114 if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
3115 BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3116 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3117 // substitute
3118
3119 int sign = SGN(*(t+*t-1));
3120
3121 WORD *tend = t;
3122 while (*tend!=0) tend+=*tend;
3123 WORD nmove = tend - t - *t;
3124 memmove(t, t+*t, nmove*sizeof(WORD));
3125 t += nmove;
3126
3127 *t++ = 8; // term length
3128 *t++ = EXTRASYMBOL; // (extra)symbol
3129 *t++ = 4; // symbol length
3130 *t++ = newid; // symbol id
3131 *t++ = 1; // power of 1
3132 *t++ = 1;
3133 *t++ = 1; // coefficient of +/-1
3134 *t++ = 3 * sign;
3135 *t++ = 0; // trailing 0
3136
3137 substituted = true;
3138 break;
3139 }
3140 }
3141 }
3142 else if (*(e+1) == OPER_MUL) {
3143
3144 bool coeff_match=false, var_match=false;
3145 int sign = 1;
3146
3147 // scan through MUL-equation
3148 for (WORD *t=e+3; *t!=0; t+=*t) {
3149 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3150 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3151 coeff_match = true;
3152 sign *= SGN(*(t+*t-1));
3153 }
3154 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
3155 var_match = true;
3156 sign *= SGN(*(t+*t-1));
3157 }
3158 }
3159
3160 // substitute if found
3161 if (coeff_match && var_match) {
3162
3163 WORD *newt = e+3;
3164
3165 for (WORD *t=e+3; *t!=0;) {
3166
3167 int dt=*t;
3168
3169 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3170 memmove(newt, t, dt*sizeof(WORD));
3171 newt += dt;
3172 }
3173
3174 t+=dt;
3175 }
3176
3177 *newt++ = 8; // term length
3178 *newt++ = EXTRASYMBOL; // extrasymbol
3179 *newt++ = 4; // symbol length
3180 *newt++ = newid; // symbol id
3181 *newt++ = 1; // power of 1
3182 *newt++ = 1;
3183 *newt++ = 1; // coefficient of +/-1
3184 *newt++ = 3 * sign;
3185 *newt++ = 0; // trailing 0
3186
3187 substituted = true;
3188 }
3189 }
3190 }
3191
3192 if (!substituted) {
3193#ifdef DEBUG_GREEDY
3194 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3195#endif
3196 return false;
3197 }
3198
3199 // add extra equation (Tnew = c*y)
3200 instr.push_back(newid); // eqn.nr
3201 instr.push_back(OPER_ADD); // operator
3202 instr.push_back(9+ABS(ncoeff)); // total length
3203 instr.push_back(5+ABS(ncoeff)); // term length
3204 instr.push_back(vartype); // (extra)symbol
3205 instr.push_back(4); // symbol length
3206 instr.push_back(varnum); // symbol id
3207 instr.push_back(1); // power of 1
3208 for (int i=0; i<ABS(ncoeff); i++) // coefficient
3209 instr.push_back(optim.coeff[i]);
3210 instr.push_back(0); // trailing 0
3211 }
3212// #] type 2 :
3213// #[ type 3 : substitution of the form z=x+c (optim.type==3)
3214 if (optim.type == 3) {
3215 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3216 int varnum = ABS(optim.arg1) - 1;
3217
3218 WORD ncoeff = optim.coeff.back();
3219
3220 // scan through equation
3221 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3222 WORD *e = ebegin + optim.eqnidxs[i];
3223
3224 if (*(e+1) != OPER_ADD) continue;
3225
3226 int coeff_match=0, var_match=0;
3227
3228 for (WORD *t=e+3; *t!=0; t+=*t) {
3229 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
3230 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
3231 coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
3232 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
3233 var_match = SGN(*(t+7));
3234 }
3235
3236 // substitute if found (x+c and -x-c and matches)
3237 if (coeff_match * var_match == 1) {
3238
3239 WORD *newt = e+3;
3240
3241 for (WORD *t=e+3; *t!=0;) {
3242 int dt=*t;
3243 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3244 memmove(newt, t, dt*sizeof(WORD));
3245 newt += dt;
3246 }
3247 t+=dt;
3248 }
3249
3250 *newt++ = 8; // term length
3251 *newt++ = EXTRASYMBOL; // extrasymbol
3252 *newt++ = 4; // symbol length
3253 *newt++ = newid; // symbol id
3254 *newt++ = 1; // power of 1
3255 *newt++ = 1;
3256 *newt++ = 1; // coefficient of +/-1
3257 *newt++ = 3*coeff_match;
3258 *newt++ = 0; // trailing zero
3259
3260 substituted = true;
3261 }
3262 }
3263
3264 if (!substituted) {
3265#ifdef DEBUG_GREEDY
3266 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3267#endif
3268 return false;
3269 }
3270
3271 // add extra equation (Tnew = x+c)
3272 instr.push_back(newid); // eqn.nr
3273 instr.push_back(OPER_ADD); // operator
3274 instr.push_back(13+ABS(ncoeff)); // total length
3275 instr.push_back(8); // x-term length
3276 instr.push_back(vartype); // (extra)symbol
3277 instr.push_back(4); // symbol length
3278 instr.push_back(varnum); // symbol id
3279 instr.push_back(1); // power of 1
3280 instr.push_back(1);
3281 instr.push_back(1); // coefficient of 1
3282 instr.push_back(3);
3283 instr.push_back(ABS(ncoeff)+1); // c-term length
3284 for (int i=0; i<ABS(ncoeff); i++) // coefficient
3285 instr.push_back(optim.coeff[i]);
3286 instr.push_back(0); // trailing zero
3287 }
3288// #] type 3 :
3289// #[ type 4,5 : substitution of the form z=x+y or z=x-y (optim.type=4 or 5)
3290 if (optim.type >= 4) {
3291
3292 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3293 int varnumx = ABS(optim.arg1) - 1;
3294 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3295 int varnumy = ABS(optim.arg2) - 1;
3296
3297 // scan through equations
3298 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3299 WORD *e = ebegin + optim.eqnidxs[i];
3300
3301 if (*(e+1) != OPER_ADD) continue;
3302
3303 const WORD *coeffx=NULL, *coeffy=NULL;
3304 WORD ncoeffx=0,ncoeffy=0;
3305
3306 // looks for terms
3307 for (WORD *t=e+3; *t!=0; t+=*t) {
3308 if (*t == ABS(*(t+*t-1))+1) continue; // constant
3309 if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
3310 coeffx = t+5;
3311 ncoeffx = *(t+*t-1);
3312 }
3313 if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
3314 coeffy = t+5;
3315 ncoeffy = *(t+*t-1);
3316 }
3317 }
3318
3319 // check signs (type=4: x+y and -x-y, type=5: x-y and -x+y) ??????
3320 // check signs (type=4: x+y, type=5: x-y) !!!!!!!!!!
3321 if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
3322 // check absolute value of coefficients
3323 if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
3324 // substitute
3325 vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
3326
3327 WORD *newt = e+3;
3328/*
3329if ( optim.type == 5 ) {
3330 while ( *newt ) newt+=*newt;
3331 int i = (newt - e) - 3;
3332 MesPrint(" < %a",i,e+3);
3333 newt = e+3;
3334}
3335*/
3336 for (WORD *t=e+3; *t!=0;) {
3337 int dt=*t;
3338 if (*t == ABS(*(t+*t-1))+1 ||
3339 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3340 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3341 memmove(newt, t, dt*sizeof(WORD));
3342 newt += dt;
3343 }
3344 t+=dt;
3345 }
3346
3347 *newt++ = 5 + ABS(ncoeffx); // term length
3348 *newt++ = EXTRASYMBOL; // extrasymbol
3349 *newt++ = 4; // symbol length
3350 *newt++ = newid; // symbol id
3351 *newt++ = 1; // power of 1
3352 for (int j=0; j<ABS(ncoeffx); j++)// coefficient
3353 *newt++ = coeff[j];
3354 *newt++ = 0; // trailing 0
3355 substituted = true;
3356/*
3357if ( optim.type == 5 ) {
3358 int i = (newt - e) - 4;
3359 MesPrint(" > %a",i,e+3);
3360}
3361*/
3362 }
3363 }
3364 }
3365
3366 if (!substituted) {
3367#ifdef DEBUG_GREEDY
3368 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3369#endif
3370 return false;
3371 }
3372/*
3373if ( optim.type == 5 )
3374MesPrint ("improve=%d, %c%d%c%c%d)", optim.improve,
3375 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
3376 optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
3377 optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
3378*/
3379 // add extra equation (Tnew = x+/-y)
3380 instr.push_back(newid); // eqn.nr
3381 instr.push_back(OPER_ADD); // operator
3382 instr.push_back(20); // total length
3383 instr.push_back(8); // term length
3384 instr.push_back(vartypex); // (extra)symbol
3385 instr.push_back(4); // symbol length
3386 instr.push_back(varnumx); // symbol id
3387 instr.push_back(1); // power of 1
3388 instr.push_back(1);
3389 instr.push_back(1); // coefficient of 1
3390 instr.push_back(3);
3391 instr.push_back(8); // term length
3392 instr.push_back(vartypey); // (extra)symbol
3393 instr.push_back(4); // symbol length
3394 instr.push_back(varnumy); // symbol id
3395 instr.push_back(1); // power of 1
3396 instr.push_back(1);
3397 instr.push_back(1); // coefficient of +/-1
3398 instr.push_back(3*(optim.type==4?1:-1));
3399 instr.push_back(0); // trailing 0
3400 }
3401// #] type 4,5 :
3402// #[ trivial : remove trivial equations of the form Zi = +/-Zj
3403 vector<int> renum(newid+1, 0);
3404 bool do_renum=false;
3405
3406 // vector may be moved when it is extended
3407 ebegin = &*instr.begin();
3408
3409 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3410 WORD *e = ebegin + optim.eqnidxs[i];
3411 WORD *t = e+3;
3412 if (*t==0) continue; // empty (removed) equation
3413 if (*(t+*t)!=0) continue; // more than 1 term
3414 if (*t!=8) continue; // term not of correct form
3415 if (*(t+4)!=1) continue; // power != 1
3416 if (*(t+5)!=1 || *(t+6)!=1) continue; // coeff != +/-1
3417
3418 // trivial term, so renumber this one
3419 renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
3420 do_renum = true;
3421
3422 // remove equation
3423 *t=0;
3424 }
3425// #] trivial :
3426// #[ renumbering :
3427
3428 // there are renumberings to be done, so loop through all equations
3429 if (do_renum) {
3430 WORD *eend = ebegin+instr.size();
3431
3432 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3433 for (WORD *t=e+3; *t!=0; t+=*t) {
3434 if (*t == ABS(*(t+*t-1))+1) continue;
3435 if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
3436 *(t+*t-1) *= SGN(renum[*(t+3)]);
3437 *(t+3) = ABS(renum[*(t+3)]) - 1;
3438 }
3439 }
3440 }
3441 }
3442// #] renumbering :
3443
3444#ifdef DEBUG_GREEDY
3445 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3446#endif
3447
3448 return true;
3449}
3450
3451/*
3452 #] do_optimization :
3453 #[ partial_factorize :
3454*/
3455
3479int partial_factorize (vector<WORD> &instr, int n, int improve) {
3480
3481#ifdef DEBUG_GREEDY
3482 MesPrint ("*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3483#endif
3484
3485 GETIDENTITY;
3486
3487 // get starting positions of instructions
3488 vector<int> instr_idx(n);
3489 WORD *ebegin = &*instr.begin();
3490 WORD *eend = ebegin+instr.size();
3491 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3492 instr_idx[*e] = e - ebegin;
3493 }
3494
3495 // get reference counts
3496/*
3497 * The next construction replaces the vector construction which is
3498 * rather costly for valgrind (and maybe also in normal running)
3499 */
3500 int nmax = 2*n;
3501 WORD *numpar = (WORD *)Malloc1(nmax*sizeof(WORD),"numpar");
3502 for ( int i = 0; i < nmax; i++ ) numpar[i] = 0;
3503// vector<WORD> numpar(n);
3504 for (WORD *e=ebegin; e!=eend; e+=*(e+2))
3505 for (WORD *t=e+3; *t!=0; t+=*t) {
3506 if (*t == ABS(*(t+*t-1))+1) continue;
3507 if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
3508 }
3509
3510 // find factorizable expressions
3511 for (int i=0; i<n; i++) {
3512 WORD *e = &*instr.begin() + instr_idx[i];
3513 if (*(e+1) != OPER_ADD) continue;
3514
3515 // count symbol occurrences
3516 map<WORD,WORD> cnt; // 1-indexed, <0:EXTRASYMBOL, >0:SYMBOL
3517
3518 for (WORD *t=e+3; *t!=0; t+=*t) {
3519 if (*t==ABS(*(t+*t-1))+1) continue;
3520
3521 // count symbols in t
3522 if (*(t+4)==1)
3523 cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
3524
3525 // count symbols in extrasymbols of t
3526 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3527 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3528 if (*(t2+1) != OPER_MUL) continue;
3529 for (t2+=3; *t2!=0; t2+=*t2) {
3530 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3531 if (*(t2+4)==1)
3532 cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
3533 }
3534 }
3535 }
3536
3537 // find most-occurring symbol
3538 WORD x=0, best=0;
3539 for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
3540 if (it->second > best) { x=it->first; best=it->second; }
3541
3542 // occurrence>=2 and occurrence>improve, so factorize
3543
3544 if (best>=2 && best>improve) {
3545 // initialize new equation (Zi from example above)
3546 vector<WORD> new_eqn;
3547 new_eqn.push_back(n);
3548 new_eqn.push_back(OPER_ADD);
3549 new_eqn.push_back(0); // length
3550
3551 WORD dt;
3552 WORD *newt=e+3;
3553 for (WORD *t=e+3; *t!=0; t+=dt) {
3554 dt = *t;
3555 bool keep=true;
3556
3557 if (*t!=ABS(*(t+*t-1))+1) {
3558
3559 // factorized symbol is in t itself
3560 if (*(t+4)==1) {
3561 WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
3562 if (y==x) {
3563 new_eqn.push_back(*t-4);
3564 new_eqn.insert(new_eqn.end(), t+5, t+dt);
3565 keep=false;
3566 }
3567 }
3568
3569 // look in extrasymbol of t with ref.count=1
3570 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3571 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3572 if (*(t2+1) == OPER_MUL) {
3573 bool has_x=false;
3574 for (t2+=3; *t2!=0; t2+=*t2) {
3575 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3576 WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
3577 // extrasymbol has factorized symbol
3578 if (y==x && *(t2+4)==1) {
3579 has_x=true;
3580 // copy remaining part
3581 WORD *tend=t2+*t2;
3582 WORD sign = SGN(*(tend-1));
3583 while (*tend!=0) tend+=*tend;
3584 int dt2 = tend - (t2+*t2);
3585 memmove(t2, t2+*t2, (dt2+1)*sizeof(WORD));
3586 t2 += dt2;
3587 *(t2-1) *= sign;
3588 break;
3589 }
3590 }
3591 if (has_x) {
3592 // extrasymbol has x, so add it to new equation
3593 keep=false;
3594 int thisidx=new_eqn.size();
3595 new_eqn.insert(new_eqn.end(), t, t+dt);
3596 t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
3597 // if becomes trivial, substitute the term
3598 if (*(t2+*t2)==0) {
3599 // it's a number
3600 if (*t2 == ABS(*(t2+*t2-1))+1) {
3601 if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
3602 // original equation has coefficient of +/-1, so replace it
3603 WORD sign = SGN(new_eqn.back());
3604 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3605 new_eqn.insert(new_eqn.end(), t2, t2+*t2);
3606 new_eqn.back() *= sign;
3607 *t2 = 0;
3608 }
3609 else {
3610 // two non-trivial coefficients, so multiply them
3611 // note: untested code (found no way to trigger it)
3612 UWORD *tmp = NumberMalloc("partial_factorize");
3613 WORD ntmp=0;
3614 MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
3615 (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
3616 tmp, &ntmp);
3617 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3618 new_eqn.push_back(ABS(ntmp)+1);
3619 new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
3620 NumberFree(tmp,"partial_factorize");
3621 *t2 = 0;
3622 }
3623 }
3624 else if (*(t2+4)==1) {
3625 // it's a variable
3626 new_eqn.back() *= SGN(*(t2+*t2-1));
3627 new_eqn[thisidx+1] = *(t2+1);
3628 new_eqn[thisidx+2] = *(t2+2);
3629 new_eqn[thisidx+3] = *(t2+3);
3630 new_eqn[thisidx+4] = *(t2+4);
3631 *t2 = 0;
3632 }
3633 }
3634 }
3635 }
3636 }
3637 }
3638
3639 // no x, so copy it
3640 if (keep) {
3641 memmove(newt, t, dt*sizeof(WORD));
3642 newt += dt;
3643 }
3644 }
3645
3646 // finalize new equation
3647 new_eqn.push_back(0);
3648 new_eqn[2] = new_eqn.size();
3649
3650 bool empty = newt == e+3;
3651 if ( n+1 >= nmax ) {
3652 int i, newnmax = nmax*2;
3653 WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3654 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3655 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3656 M_free(numpar,"numpar");
3657 numpar = newnumpar;
3658 nmax = newnmax;
3659 }
3660// numpar.push_back(0);
3661 n++;
3662
3663 // if original is not empty, add new equation (Zj) to it
3664 // otherwise replace it later
3665 if (!empty) {
3666 *newt++ = 8;
3667 *newt++ = EXTRASYMBOL;
3668 *newt++ = 4;
3669 *newt++ = n;
3670 *newt++ = 1;
3671 *newt++ = 1;
3672 *newt++ = 1;
3673 *newt++ = 3;
3674 *newt++ = 0;
3675 }
3676
3677 // add new equation to instructions
3678 instr_idx.push_back(instr.size());
3679 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3680
3681 // generate another new equation (Zj=x*Zi)
3682 new_eqn.clear();
3683 new_eqn.push_back(n);
3684 new_eqn.push_back(OPER_MUL);
3685 new_eqn.push_back(20);
3686 new_eqn.push_back(8);
3687
3688 // add factorized symbol
3689 if (x>0) {
3690 new_eqn.push_back(SYMBOL);
3691 new_eqn.push_back(4);
3692 new_eqn.push_back(x-1);
3693 new_eqn.push_back(1);
3694 }
3695 else {
3696 new_eqn.push_back(EXTRASYMBOL);
3697 new_eqn.push_back(4);
3698 new_eqn.push_back(-x-1);
3699 new_eqn.push_back(1);
3700 }
3701 new_eqn.push_back(1);
3702 new_eqn.push_back(1);
3703 new_eqn.push_back(3);
3704 new_eqn.push_back(8);
3705 // add new equation (Zi)
3706 new_eqn.push_back(EXTRASYMBOL);
3707 new_eqn.push_back(4);
3708 new_eqn.push_back(n-1);
3709 new_eqn.push_back(1);
3710 new_eqn.push_back(1);
3711 new_eqn.push_back(1);
3712 new_eqn.push_back(3);
3713 new_eqn.push_back(0);
3714
3715 if (!empty) {
3716 // add new equation (Zj) to instructions
3717 instr_idx.push_back(instr.size());
3718 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3719 if ( n+1 >= nmax ) {
3720 int i, newnmax = nmax*2;
3721 WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3722 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3723 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3724 M_free(numpar,"numpar");
3725 numpar = newnumpar;
3726 nmax = newnmax;
3727 }
3728// numpar.push_back(0);
3729 n++;
3730 }
3731 else {
3732 // replace e with Zj
3733 e = &*instr.begin() + instr_idx[i];
3734 e[1] = OPER_MUL;
3735 memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*sizeof(WORD));
3736 }
3737
3738 // decrease i, so this expression is factorized again if possible
3739 i--;
3740 }
3741 }
3742
3743#ifdef DEBUG_GREEDY
3744 MesPrint ("*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3745#endif
3746 M_free(numpar,"numpar");
3747 return n;
3748}
3749
3750/*
3751 #] partial_factorize :
3752 #[ optimize_greedy :
3753*/
3754
3773vector<WORD> optimize_greedy (vector<WORD> instr, LONG time_limit) {
3774
3775#ifdef DEBUG
3776 int old_num_oper = count_operators(instr);
3777 MesPrint ("*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3778 thetime_str().c_str(), old_num_oper);
3779#endif
3780
3781 LONG start_time = TimeWallClock(1);
3782
3783 WORD *ebegin = &*instr.begin();
3784 WORD *eend = ebegin+instr.size();
3785
3786 // store final equation, since it must be the last equation later
3787 int final_eqn_idx = 0;
3788 int next_eqn = 0;
3789
3790 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3791 next_eqn = *e + 1;
3792 final_eqn_idx = e-ebegin;
3793 }
3794 // optimize instructions
3795 while (TimeWallClock(1)-start_time < time_limit) {
3796 int old_next_eqn = next_eqn;
3797
3798 // find optimizations
3799 vector<optimization> optim = find_optimizations(instr);
3800
3801 // add_eqnidxs contains modified equations, which might have to be updated later again
3802 vector<int> add_eqnidxs;
3803
3804 // number of optimizations to do
3805 int num_do_optims = max(AO.Optimize.greedyminnum, (int)optim.size()*AO.Optimize.greedymaxperc/100);
3806
3807 // if best improvement is one, do all optimizations
3808 int best_improve=0;
3809 for (int i=0; i<(int)optim.size(); i++)
3810 best_improve = max(best_improve, optim[i].improve);
3811 if (best_improve <= 1)
3812 num_do_optims = MAXPOSITIVE;
3813 // do a number of optimizations
3814 while (optim.size() > 0 && num_do_optims-- > 0) {
3815
3816 // find best optimization
3817 int best=0;
3818 best_improve=0;
3819 for (int i=0; i<(int)optim.size(); i++)
3820 if (optim[i].improve > best_improve) {
3821 best=i;
3822 best_improve=optim[i].improve;
3823 }
3824
3825 // add extra equations
3826 for (int i=0; i<(int)add_eqnidxs.size(); i++)
3827 optim[best].eqnidxs.push_back(add_eqnidxs[i]);
3828
3829 // do optimization, update next_eqn if successful
3830 int next_idx = instr.size();
3831 if (do_optimization(optim[best], instr, next_eqn)) {
3832 next_eqn++;
3833 add_eqnidxs.push_back(next_idx);
3834 }
3835
3836 optim.erase(optim.begin()+best);
3837 }
3838
3839 // partially factorize with improve >= best_improve
3840 next_eqn = partial_factorize(instr, next_eqn, best_improve);
3841
3842 // check whether nothing has changed
3843 if (next_eqn == old_next_eqn) break;
3844 }
3845
3846 // add final equation to the back (must be by definition)
3847 instr.push_back(next_eqn);
3848 instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
3849
3850 // removed original final equation
3851 instr[final_eqn_idx+3] = 0;
3852
3853 // remove extra zeroes
3854 WORD *t = &instr[0];
3855
3856 ebegin = &*instr.begin();
3857 eend = ebegin+instr.size();
3858 int de=0;
3859
3860 for (WORD *e=ebegin; e!=eend; e+=de) {
3861 de = *(e+2);
3862 int n=3;
3863 while (*(e+n) != 0) n+=*(e+n);
3864 n++;
3865 memmove (t, e, n*sizeof(WORD));
3866 *(t+2) = n;
3867 t += n;
3868 }
3869
3870 instr.resize(t - &instr[0]);
3871
3872#ifdef DEBUG
3873 MesPrint ("*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3874 thetime_str().c_str(), old_num_oper, count_operators(instr));
3875#endif
3876
3877 return instr;
3878}
3879
3880/*
3881 #] optimize_greedy :
3882 #[ recycle_variables :
3883*/
3884
3913vector<WORD> recycle_variables (const vector<WORD> &all_instr) {
3914
3915#ifdef DEBUG_MORE
3916 MesPrint ("*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3917#endif
3918
3919 // get starting positions of instructions
3920 vector<const WORD *> instr;
3921 const WORD *tbegin = &*all_instr.begin();
3922 const WORD *tend = tbegin+all_instr.size();
3923 for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
3924 instr.push_back(t);
3925 int n = instr.size();
3926
3927 // determine with expressions are connected, how many intermediate
3928 // are needed (assuming it's a expression tree instead of a DAG) and
3929 // sort the leaves such that you need a minimal number of variables
3930 vector<int> vars_needed(n);
3931 vector<bool> vis(n,false);
3932 vector<vector<int> > conn(n);
3933
3934 stack<int> s;
3935 s.push(n);
3936
3937 while (!s.empty()) {
3938 int i=s.top(); s.pop();
3939 if (i>0) {
3940 i--;
3941 if (vis[i]) continue;
3942 vis[i]=true;
3943 s.push(-(i+1));
3944
3945 // find all connections
3946 for (const WORD *t=instr[i]+3; *t!=0; t+=*t)
3947 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3948 int k = *(t+3);
3949 conn[i].push_back(k);
3950 s.push(k+1);
3951 }
3952 }
3953 else {
3954 i=-i-1;
3955
3956 // sort the childs w.r.t. needed variables
3957 vector<pair<int,int> > need;
3958 for (int j=0; j<(int)conn[i].size(); j++)
3959 need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
3960
3961 // keep the comma expression in proper order
3962 if (*(instr[i]+1) != OPER_COMMA)
3963 sort(need.rbegin(), need.rend());
3964
3965 vars_needed[i] = 1;
3966 for (int j=0; j<(int)need.size(); j++) {
3967 vars_needed[i] = max(vars_needed[i], need[j].first+j);
3968 conn[i][j] = need[j].second;
3969 }
3970 }
3971 }
3972
3973 // order the instructions in depth-first order and determine the first
3974 // and last occurrences of variables
3975 vector<int> order, first(n,0), last(n,0);
3976 vis = vector<bool>(n,false);
3977 s.push(n);
3978
3979 while (!s.empty()) {
3980
3981 int i=s.top(); s.pop();
3982
3983 if (i>0) {
3984 i--;
3985 if (vis[i]) continue;
3986 vis[i]=true;
3987 s.push(-(i+1));
3988 for (int j=(int)conn[i].size()-1; j>=0; j--)
3989 s.push(conn[i][j]+1);
3990 }
3991 else {
3992 i=-i-1;
3993 first[i] = last[i] = order.size();
3994 order.push_back(i);
3995 for (int j=0; j<(int)conn[i].size(); j++) {
3996 int k = conn[i][j];
3997 last[k] = max(last[k], first[i]);
3998 }
3999 }
4000 }
4001
4002 // find the renumbering to recycled variables, where at any time the
4003 // lowest-indexed variable that can be used is chosen
4004 int numvar=0;
4005 set<int> var;
4006 vector<int> renum(n);
4007
4008 for (int i=0; i<(int)order.size(); i++) {
4009 for (int j=0; j<(int)conn[order[i]].size(); j++) {
4010 int k = conn[order[i]][j];
4011 if (last[k] == i) var.insert(renum[k]);
4012 }
4013
4014 if (var.empty()) var.insert(numvar++);
4015 renum[order[i]] = *var.begin(); var.erase(var.begin());
4016 }
4017
4018 // put the number of variables used in a preprocessor variable
4019
4020 // generate new instructions with the renumbering
4021 vector<WORD> newinstr;
4022
4023 for (int i=0; i<(int)order.size(); i++) {
4024 int x = order[i];
4025 int j = newinstr.size();
4026 newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
4027
4028 newinstr[j] = renum[newinstr[j]];
4029
4030 for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
4031 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
4032 *(t+3) = renum[*(t+3)];
4033 }
4034
4035#ifdef DEBUG_MORE
4036 MesPrint ("*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
4037#endif
4038
4039 return newinstr;
4040}
4041
4042/*
4043 #] recycle_variables :
4044 #[ optimize_expression_given_Horner :
4045*/
4046
4061
4062#ifdef DEBUG
4063 MesPrint ("*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4064#endif
4065
4066 GETIDENTITY;
4067
4068 // initialize timer
4069 LONG start_time = TimeWallClock(1);
4070 LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
4071 if (time_limit == 0) time_limit=MAXPOSITIVE;
4072
4073 // pick a Horner scheme from the list
4074 LOCK(optimize_lock);
4075 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4076 optimize_best_Horner_schemes.pop_back();
4077 UNLOCK(optimize_lock);
4078
4079// if ( ( AO.Optimize.debugflags&2 ) == 2 ) {
4080// MesPrint ("Scheme: %a",Horner_scheme.size(),&(Horner_scheme[0]));
4081// }
4082
4083 // apply Horner scheme
4084 vector<WORD> tree = Horner_tree(optimize_expr, Horner_scheme);
4085
4086 // generate instructions, eventually with CSE
4087 vector<WORD> instr;
4088
4089 if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
4090 instr = generate_instructions(tree, true);
4091 else
4092 instr = generate_instructions(tree, false);
4094 if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
4095 instr = merge_operators(instr, false);
4096 instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4097 instr = merge_operators(instr, true);
4098 instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4099 }
4100 instr = merge_operators(instr, true);
4101
4102 // recycle the temporary variables
4103 instr = recycle_variables(instr);
4104
4105 // determine the quality of the code and possibly update the best code
4106 int num_oper = count_operators(instr);
4107
4108 LOCK(optimize_lock);
4109 if (num_oper < optimize_best_num_oper) {
4110 optimize_num_vars = Horner_scheme.size();
4111 optimize_best_num_oper = num_oper;
4112 optimize_best_instr = instr;
4113 optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
4114 }
4115 UNLOCK(optimize_lock);
4116
4117 // clean poly_vars, that are allocated by Horner_tree
4118 AN.poly_num_vars = 0;
4119 M_free(AN.poly_vars,"poly_vars");
4120
4121#ifdef DEBUG
4122 MesPrint ("*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4123#endif
4124}
4125
4126/*
4127 #] optimize_expression_given_Horner :
4128 #[ PF_optimize_expression_given_Horner :
4129*/
4130#ifdef WITHMPI
4131
4132// Initialization.
4133void PF_optimize_expression_given_Horner_master_init () {
4134 // Nothing to do for now.
4135}
4136
4137// Wait for an idle slave and return the process number.
4138int PF_optimize_expression_given_Horner_master_next() {
4139
4140 // Find an idle slave.
4141 int next;
4142 PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_HORNER_MSGTAG, &next, NULL);
4143 return next;
4144
4145}
4146
4147// The main function on the master.
4148void PF_optimize_expression_given_Horner_master () {
4149
4150#ifdef DEBUG
4151 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4152#endif
4153
4154 // pick a Horner scheme from the list
4155 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4156 optimize_best_Horner_schemes.pop_back();
4157
4158 // Find an idle slave.
4159 int next = PF_optimize_expression_given_Horner_master_next();
4160
4161 // Send a new task to the slave.
4163 int len = Horner_scheme.size();
4164 PF_LongSinglePack(&len, 1, PF_INT);
4165 PF_LongSinglePack(&Horner_scheme[0], len, PF_WORD);
4166 PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4167
4168#ifdef DEBUG
4169 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4170#endif
4171
4172}
4173
4174// Wait for all the slaves to finish their tasks.
4175void PF_optimize_expression_given_Horner_master_wait () {
4176
4177#ifdef DEBUG
4178 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4179#endif
4180
4181 // Wait for all the slaves.
4182 for (int i = 1; i < PF.numtasks; i++) {
4183 int next = PF_optimize_expression_given_Horner_master_next();
4184 // Send a null task.
4186 int len = 0;
4187 PF_LongSinglePack(&len, 1, PF_INT);
4188 PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4189 }
4190
4191 // Combine the result from all the slaves.
4192 optimize_best_num_oper = INT_MAX;
4193 for (int i = 1; i < PF.numtasks; i++) {
4194 PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_COLLECT_MSGTAG, NULL, NULL);
4195
4196 int len;
4197
4198 // The first integer is the number of operations.
4199 PF_LongSingleUnpack(&len, 1, PF_INT);
4200
4201 if (len >= optimize_best_num_oper) continue;
4202
4203 // Update the best result.
4204 optimize_best_num_oper = len;
4205 PF_LongSingleUnpack(&len, 1, PF_INT);
4206 optimize_best_instr.resize(len);
4207 PF_LongSingleUnpack(&optimize_best_instr[0], len, PF_WORD);
4208 PF_LongSingleUnpack(&len, 1, PF_INT);
4209 optimize_best_vars.resize(len);
4210 PF_LongSingleUnpack(&optimize_best_vars[0], len, PF_WORD);
4211
4212 optimize_num_vars = optimize_best_vars.size(); // TODO
4213 }
4214
4215#ifdef DEBUG
4216 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4217#endif
4218
4219}
4220
4221// The main function on the slaves.
4222void PF_optimize_expression_given_Horner_slave () {
4223
4224#ifdef DEBUG
4225 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4226#endif
4227
4228 optimize_best_Horner_schemes.clear();
4229 optimize_best_num_oper = INT_MAX;
4230
4231 int dummy = 0;
4232 int len;
4233
4234 for (;;) {
4235 // Ask the master for the next task.
4237 PF_LongSinglePack(&dummy, 1, PF_INT);
4238 PF_LongSingleSend(MASTER, PF_OPT_HORNER_MSGTAG);
4239
4240 // Get a task from the master.
4241 PF_LongSingleReceive(MASTER, PF_OPT_HORNER_MSGTAG, NULL, NULL);
4242
4243 // Length of the task.
4244 PF_LongSingleUnpack(&len, 1, PF_INT);
4245
4246 // No task remains.
4247 if (len == 0) break;
4248
4249 // Perform the given task.
4250 optimize_best_Horner_schemes.push_back(vector<WORD>());
4251 vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4252 Horner_scheme.resize(len);
4253 PF_LongSingleUnpack(&Horner_scheme[0], len, PF_WORD);
4255 }
4256
4257 // Send the result to the master.
4259 PF_LongSinglePack(&optimize_best_num_oper, 1, PF_INT);
4260 if (optimize_best_num_oper != INT_MAX) {
4261 len = optimize_best_instr.size();
4262 PF_LongSinglePack(&len, 1, PF_INT);
4263 PF_LongSinglePack(&optimize_best_instr[0], len, PF_WORD);
4264 len = optimize_best_vars.size();
4265 PF_LongSinglePack(&len, 1, PF_INT);
4266 PF_LongSinglePack(&optimize_best_vars[0], len, PF_WORD);
4267 }
4268 PF_LongSingleSend(MASTER, PF_OPT_COLLECT_MSGTAG);
4269
4270#ifdef DEBUG
4271 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4272#endif
4273
4274}
4275
4276#endif
4277/*
4278 #] PF_optimize_expression_given_Horner :
4279 #[ generate_output :
4280*/
4281
4291void generate_output (const vector<WORD> &instr, int exprnr, int extraoffset, const vector<vector<WORD> > &brackets) {
4292
4293#ifdef DEBUG
4294 MesPrint ("*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4295#endif
4296
4297 GETIDENTITY;
4298 vector<WORD> output;
4299
4300 // one-indexed instead of zero-indexed
4301 extraoffset++;
4302 int num = 0;
4303 int maxsize = (int)instr.size();
4304 for (int i=0; i<maxsize; i+=instr[i+2]) num++;
4305 int *tstart = (int *)Malloc1(num*sizeof(int),"nplaces");
4306 num = 0;
4307 for (int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
4308 for (int j=0; j<num; j++) {
4309 int i = tstart[j];
4310
4311 // copy arguments
4312 WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
4313
4314 // update maximal variable number
4315 AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
4316
4317 // renumber symbols and extrasymbols to correct values
4318 for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4319 if (*t == ABS(*(t+*t-1))+1) continue;
4320 if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
4321 if (*(t+1)==EXTRASYMBOL) {
4322 *(t+1) = SYMBOL;
4323 *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
4324 }
4325 }
4326
4327 // reformat multiplication instructions, since their current
4328 // format is "expr.nr OPER_MUL length arguments", which differs
4329 // from Form's format for a product of symbols
4330 if (instr[i+1]==OPER_MUL) {
4331
4332 WORD *now=AT.WorkPointer+1;
4333 int dt;
4334 bool coeff=false;
4335 int coeff_sign=1;
4336
4337 for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
4338 dt = *t;
4339 if (*t == ABS(*(t+*t-1))+1) {
4340 // copy coefficient
4341 memmove(AT.WorkPointer+instr[i+2], t, *t*sizeof(WORD));
4342 coeff = true;
4343 }
4344 else {
4345 // move symbol
4346 int n = *(t+2);
4347 memmove(now, t+1, n*sizeof(WORD));
4348 now += n;
4349 coeff_sign *= SGN(*(t+dt-1));
4350 }
4351 }
4352
4353 if (coeff) {
4354 // add existing coefficient
4355 int n = *(AT.WorkPointer + instr[i+2]) - 1;
4356 memmove(now, AT.WorkPointer+instr[i+2]+1, n*sizeof(WORD));
4357 now += n;
4358 }
4359 else {
4360 // add coefficient of one
4361 *now++=1;
4362 *now++=1;
4363 *now++=3;
4364 }
4365
4366 *(now-1) *= coeff_sign;
4367
4368 *AT.WorkPointer = now - AT.WorkPointer;
4369 *now++ = 0;
4370 }
4371
4372 // in the case of simultaneous optimization of expressions, add the
4373 // brackets to the final expression
4374 if (instr[i+1]==OPER_COMMA) {
4375 WORD *start = AT.WorkPointer + instr[i+2];
4376 WORD *now = start;
4377 int b=0;
4378 for (const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4379 if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) ) break;
4380 *now++ = *t + brackets[b].size();
4381 if (brackets[b].size() != 0) {
4382 memcpy(now, &brackets[b][0], brackets[b].size()*sizeof(WORD));
4383 now += brackets[b].size();
4384 }
4385 memcpy(now, t+1, (*t-1)*sizeof(WORD));
4386 now += *t-1;
4387 b++;
4388 }
4389 *now++ = 0;
4390 memmove(AT.WorkPointer, start, (now-start)*sizeof(WORD));
4391 }
4392
4393 // add the number of the extra symbol; if it is the last one,
4394 // replace the extra symbol number with the expression number
4395 if (i+instr[i+2]<(int)instr.size())
4396 output.push_back(instr[i]+extraoffset);
4397 else {
4398 output.push_back(-(exprnr+1));
4399 }
4400
4401 // add code for this symbol
4402 int n=0;
4403 while (*(AT.WorkPointer+n)!=0)
4404 n += *(AT.WorkPointer+n);
4405 n++;
4406 output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
4407 }
4408
4409 // trailing zero
4410 output.push_back(0);
4411
4412 // clear buffer
4413 if (AO.OptimizeResult.code != NULL)
4414 M_free(AO.OptimizeResult.code, "optimize output");
4415
4416 M_free(tstart,"nplaces");
4417
4418 // copy buffer
4419 AO.OptimizeResult.codesize = output.size();
4420 AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*sizeof(WORD), "optimize output");
4421 memcpy(AO.OptimizeResult.code, &output[0], output.size()*sizeof(WORD));
4422
4423#ifdef DEBUG
4424 MesPrint ("*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4425#endif
4426}
4427
4428/*
4429 #] generate_output :
4430 #[ generate_expression :
4431*/
4432
4440WORD generate_expression (WORD exprnr) {
4441
4442#ifdef DEBUG
4443 MesPrint ("*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4444#endif
4445
4446 GETIDENTITY;
4447
4448 WORD *oldWorkPointer = AT.WorkPointer;
4449
4450 CBUF *C = cbuf+AC.cbufnum;
4451 WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
4452 POSITION position;
4453 EXPRESSIONS e = Expressions+exprnr;
4454 SetScratch(AR.infile,&(e->onfile));
4455
4456 if ( GetTerm(BHEAD term) <= 0 ) {
4457 MesPrint("Expression %d has problems in scratchfile",exprnr);
4458 Terminate(-1);
4459 }
4460 SeekScratch(AR.outfile,&position);
4461 e->onfile = position;
4462 if ( PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
4463 MesPrint("Expression %d has problems in output scratchfile",exprnr);
4464 Terminate(-1);
4465 }
4466
4467 AR.CurExpr = exprnr;
4468 NewSort(BHEAD0);
4469
4470 // scan for the original expression (marked by *t<0) and give the
4471 // terms to Generator
4472 WORD *t = AO.OptimizeResult.code;
4473 {
4474 WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
4475 // We can use the remaining part of the WorkSpace for every term that
4476 // goes through Generator. We have WorkSpace problems here if we use
4477 // the (modified) AT.WorkPointer every time in the loop.
4478 WORD* currentWorkPointer = AT.WorkPointer;
4479 while (*t!=0) {
4480 bool is_expr = *t < 0;
4481 t++;
4482 while (*t!=0) {
4483 if (is_expr) {
4484 memcpy(currentWorkPointer, t, *t*sizeof(WORD));
4485 Generator(BHEAD currentWorkPointer, C->numlhs);
4486 }
4487 t+=*t;
4488 }
4489 t++;
4490 }
4491 AR.Cnumlhs = old;
4492 }
4493
4494 // final sorting
4495 if (EndSort(BHEAD NULL,0) < 0) {
4497 Terminate(-1);
4498 }
4499
4500 AT.WorkPointer = oldWorkPointer;
4501 AR.CurExpr = oldcurexpr;
4502
4503#ifdef DEBUG
4504 MesPrint ("*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4505#endif
4506
4507 return 0;
4508}
4509
4510/*
4511 #] generate_expression :
4512 #[ optimize_print_code :
4513*/
4514
4524void optimize_print_code (int print_expr) {
4525
4526#ifdef DEBUG
4527 MesPrint ("*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4528#endif
4529 if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
4530/*
4531 * The next code is for debugging purposes. We may want the statements
4532 * in reverse order to substitute them all back.
4533 * Jan used a Mathematica program to do this. Here we make that
4534 * Format Ox,debugflag=1;
4535 * Creates reverse order during printing.
4536 * All we have to do is put id in front of the statements. This is done
4537 * in PrintExtraSymbol.
4538 */
4539 WORD *t = AO.OptimizeResult.code;
4540 WORD num = 0; // First we count the number of objects.
4541 while (*t!=0) {
4542 num++;
4543 t++; while (*t!=0) t+=*t; t++;
4544 }
4545 WORD **tstart = (WORD **)Malloc1(num*sizeof(WORD *),"print objects");
4546 t = AO.OptimizeResult.code; num = 0; // Now we get the addresses
4547 while (*t!=0) {
4548 tstart[num++] = t;
4549 t++; while (*t!=0) t+=*t; t++;
4550 }
4551 // Flip the addresses
4552 int halfnum = num/2;
4553 for (int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
4554 for ( int i = 0; i < num; i++ ) {
4555 t = tstart[i];
4556 if (*t > 0)
4557 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4558 else if (print_expr)
4559 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4560 }
4561 CBUF *C = cbuf + AM.sbufnum;
4562 if (C->numrhs >= AO.OptimizeResult.minvar)
4563 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4564 }
4565 else {
4566 // print extra symbols from ConvertToPoly in optimization
4567 CBUF *C = cbuf + AM.sbufnum;
4568 if (C->numrhs >= AO.OptimizeResult.minvar)
4569 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4570
4571 WORD *t = AO.OptimizeResult.code;
4572 while (*t!=0) {
4573 if (*t > 0) {
4574 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4575 }
4576 else if (print_expr)
4577 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4578 t++;
4579 while (*t!=0) t+=*t;
4580 t++;
4581 }
4582 }
4583
4584#ifdef DEBUG
4585 MesPrint ("*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4586#endif
4587}
4588
4589/*
4590 #] optimize_print_code :
4591 #[ Optimize :
4592*/
4593
4637int Optimize (WORD exprnr, int do_print) {
4638
4639#if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY))
4640 // set AS.printflag negative temporary.
4641 struct save_printflag {
4642 save_printflag() {
4643 oldprintflag = AS.printflag;
4644 AS.printflag = -1;
4645 }
4646 ~save_printflag() {
4647 AS.printflag = oldprintflag;
4648 }
4649 int oldprintflag;
4650 } save_printflag_;
4651#endif
4652
4653#ifdef DEBUG
4654 MesPrint ("*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4655 MesPrint ("*** %"); PrintRunningTime();
4656#endif
4657
4658#ifdef WITHPTHREADS
4659 optimize_lock = dummylock;
4660#endif
4661
4662 AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
4663
4664 if (get_expression(exprnr) < 0) return -1;
4665 vector<vector<WORD> > brackets = get_brackets();
4666
4667#ifdef DEBUG
4668#ifdef WITHMPI
4669 if (PF.me == MASTER)
4670#endif
4671 MesPrint ("*** runtime after preparing the expression: %"); PrintRunningTime();
4672#endif
4673
4674 if (optimize_expr[0]==0 ||
4675 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
4676 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
4677 optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
4678 if (AO.OptimizeResult.code != NULL)
4679 M_free(AO.OptimizeResult.code, "optimize output");
4680
4681 // zero terms or one trivial term (number or +/-variable), so no
4682 // optimization, so copy expression; special case because without
4683 // operators the optimization crashes
4684 AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*sizeof(WORD), "optimize output");
4685 AO.OptimizeResult.code[0] = -(exprnr+1);
4686 memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*sizeof(WORD));
4687 AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
4688 }
4689 else {
4690 // find Horner scheme(s)
4691 optimize_best_Horner_schemes.clear();
4692 if (AO.Optimize.horner == O_OCCURRENCE) {
4693 if (AO.Optimize.hornerdirection != O_BACKWARD)
4694 optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, false));
4695 if (AO.Optimize.hornerdirection != O_FORWARD)
4696 optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, true));
4697 }
4698 else {
4699 if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
4700 optimize_best_Horner_schemes.push_back(simulated_annealing());
4701 }
4702 else {
4703 mcts_best_schemes.clear();
4704 if ( AO.inscheme ) {
4705 optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
4706 for ( int i=0; i < AO.schemenum; i++ )
4707 optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
4708 }
4709 else {
4710 for ( int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
4712 // generate results
4713 for (set<pair<int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
4714 optimize_best_Horner_schemes.push_back(i->second);
4715#ifdef DEBUG_MCTS
4716 MesPrint ("{%a} -> %d",i->second.size(), &i->second[0], i->first);
4717#endif
4718 }
4719 }
4720 // clear the tree by making a new empty one.
4721 mcts_root = tree_node();
4722 }
4723 }
4724#ifdef DEBUG
4725#ifdef WITHMPI
4726 if (PF.me == MASTER)
4727#endif
4728 MesPrint ("*** runtime after Horner: %"); PrintRunningTime();
4729#endif
4730
4731#ifdef WITHMPI
4732 if (PF.me == MASTER ) {
4733 PF_optimize_expression_given_Horner_master_init();
4734#endif
4735
4736 // find best Horner scheme and results
4737 optimize_best_num_oper = INT_MAX;
4738
4739 int imax = (int)optimize_best_Horner_schemes.size();
4740
4741 for (int i=0; i<imax; i++) {
4742#if defined(WITHPTHREADS)
4743 if (AM.totalnumberofthreads > 1)
4744 optimize_expression_given_Horner_threaded();
4745 else
4746#elif defined(WITHMPI)
4747 if (PF.numtasks > 1)
4748 PF_optimize_expression_given_Horner_master();
4749 else
4750#endif
4752 }
4753
4754#ifdef WITHMPI
4755 PF_optimize_expression_given_Horner_master_wait();
4756 }
4757 else {
4758 if (PF.numtasks > 1)
4759 PF_optimize_expression_given_Horner_slave();
4760 }
4761#endif
4762
4763#ifdef WITHPTHREADS
4764 MasterWaitAll();
4765#endif
4766 // format results, then print it (for "Print") or modify
4767 // expression (for "#Optimize")
4768#ifdef WITHMPI
4769 if (PF.me == MASTER)
4770#endif
4771 generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
4772#ifdef WITHMPI
4773 else {
4774 // non-null dummy code for slaves
4775 if (AO.OptimizeResult.code == NULL) {
4776 AO.OptimizeResult.code = (WORD *)Malloc1(sizeof(WORD), "optimize output");
4777 }
4778 }
4779#endif
4780 }
4781
4782#ifdef WITHMPI
4783 if (PF.me == MASTER) {
4785 PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4786 PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4787 }
4788 PF_Broadcast();
4789 if (PF.me != MASTER) {
4790 PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4791 PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4792 }
4793#endif
4794
4795 // set preprocessor variables
4796 char str[100];
4797 snprintf (str,100,"%d",AO.OptimizeResult.minvar);
4798 PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)str,0,1);
4799 snprintf (str,100,"%d",AO.OptimizeResult.maxvar);
4800 PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)str,0,1);
4801
4802 if (do_print) {
4803#ifdef WITHMPI
4804 if (PF.me == MASTER)
4805#endif
4807 ClearOptimize();
4808 }
4809 else {
4810#ifdef WITHMPI
4811 if (PF.me == MASTER)
4812#endif
4813 generate_expression(exprnr);
4814 }
4815
4816#ifdef WITHMPI
4817 if (PF.me == MASTER) {
4818#endif
4819
4820 if ( AO.Optimize.printstats > 0 ) {
4821 char str[20];
4822 MesPrint("");
4823 count_operators(optimize_expr,true);
4824 int numop = count_operators(optimize_best_instr,true);
4825 snprintf(str,20,"%d",numop);
4826 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4827 }
4828 else {
4829 char str[20];
4830 int numop = count_operators(optimize_best_instr,false);
4831 snprintf(str,20,"%d",numop);
4832 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4833 }
4834
4835 if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
4836 GETIDENTITY
4837 UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
4838 int i, sym;
4839 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4840 FiniLine();
4841 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4842 TokenToLine((UBYTE *)" Scheme selected: ");
4843 for ( i = 0; i < optimize_num_vars; i++ ) {
4844 Out = OutScr;
4845 sym = optimize_best_vars[i];
4846 if ( i > 0 ) TokenToLine((UBYTE *)",");
4847 if ( sym < NumSymbols ) {
4848 StrCopy(FindSymbol(sym),OutScr);
4849/* StrCopy(VARNAME(symbols,sym),OutScr); */
4850 }
4851 else {
4852 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4853 if ( AC.extrasymbols == 0 ) {
4854 Out = NumCopy((MAXVARIABLES-sym),Out);
4855 Out = StrCopy((UBYTE *)"_",Out);
4856 }
4857 else if ( AC.extrasymbols == 1 ) {
4858 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4859 }
4860 }
4861 TokenToLine(OutScr);
4862 }
4863 TokenToLine((UBYTE *)";");
4864 FiniLine();
4865 AO.OutFill = old2;
4866 AO.OutputLine = old1;
4867 }
4868
4869 {
4870 GETIDENTITY
4871 UBYTE *OutScr, *Out, *outstring = 0;
4872 int i, sym;
4873 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4874 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4875 for ( i = 0; i < optimize_num_vars; i++ ) {
4876 Out = OutScr;
4877 sym = optimize_best_vars[i];
4878 if ( sym < NumSymbols ) {
4879 StrCopy(FindSymbol(sym),OutScr);
4880/* StrCopy(VARNAME(symbols,sym),OutScr); */
4881 }
4882 else {
4883 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4884 if ( AC.extrasymbols == 0 ) {
4885 Out = NumCopy((MAXVARIABLES-sym),Out);
4886 Out = StrCopy((UBYTE *)"_",Out);
4887 }
4888 else if ( AC.extrasymbols == 1 ) {
4889 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4890 }
4891 }
4892 outstring = AddToString(outstring,OutScr,1);
4893 }
4894 if ( outstring == 0 ) {
4895 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)"",0,1);
4896 }
4897 else {
4898 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)outstring,0,1);
4899 M_free(outstring,"AddToString");
4900 }
4901 }
4902
4903#ifdef WITHMPI
4904 }
4905
4906 // synchronize optimvalue_ and optimscheme_
4907 if ( PF.me == MASTER ) {
4908 UBYTE *value;
4909 int bytes;
4910
4912
4913 value = GetPreVar((UBYTE *)"optimvalue_", WITHERROR);
4914 bytes = strlen((char *)value);
4915 PF_LongMultiPack(&bytes, 1, PF_INT);
4916 PF_LongMultiPack(value, bytes, PF_BYTE);
4917
4918 value = GetPreVar((UBYTE *)"optimscheme_", WITHERROR);
4919 bytes = strlen((char *)value);
4920 PF_LongMultiPack(&bytes, 1, PF_INT);
4921 PF_LongMultiPack(value, bytes, PF_BYTE);
4922 }
4924 if ( PF.me != MASTER ) {
4925 static vector<UBYTE> prevarbuf;
4926 UBYTE *value;
4927 int bytes;
4928
4929 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4930 prevarbuf.reserve(bytes + 1);
4931 value = &prevarbuf[0];
4932 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4933 value[bytes] = '\0'; // null terminator
4934 PutPreVar((UBYTE *)"optimvalue_", value, NULL, 1);
4935
4936 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4937 prevarbuf.reserve(bytes + 1);
4938 value = &prevarbuf[0];
4939 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4940 value[bytes] = '\0'; // null terminator
4941 PutPreVar((UBYTE *)"optimscheme_", value, NULL, 1);
4942 }
4943#endif
4944
4945 // cleanup
4946 M_free(optimize_expr,"LoadOptim");
4947
4948#ifdef DEBUG
4949 MesPrint ("*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4950#endif
4951
4952 return 0;
4953}
4954
4955/*
4956 #] Optimize :
4957 #[ ClearOptimize :
4958*/
4959
4975{
4976 char str[20];
4977 WORD numexpr, *w;
4978 int error = 0;
4979 if ( AO.OptimizeResult.code != NULL ) {
4980 M_free(AO.OptimizeResult.code, "optimize output");
4981 AO.OptimizeResult.code = NULL;
4982 AO.OptimizeResult.codesize = 0;
4983 PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)("0"),0,1);
4984 PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)("0"),0,1);
4985 PruneExtraSymbols(AO.OptimizeResult.minvar-1);
4986 cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
4987 AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
4988 }
4989 if ( AO.OptimizeResult.nameofexpr != NULL ) {
4990/*
4991 We have to pick up the expression by its name. Numbers may change.
4992 Note that this requires that when we overwrite an expression, we
4993 check that it is not an optimized expression. See execute.c and
4994 comexpr.c
4995*/
4996 if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
4997 MesPrint("@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
4998 Terminate(-1);
4999 }
5000 M_free(AO.OptimizeResult.nameofexpr, "optimize expression name");
5001 AO.OptimizeResult.nameofexpr = NULL;
5002 w = &(Expressions[numexpr].status);
5003 *w = SetExprCases(DROP,1,*w);
5004 if ( *w < 0 ) error = 1;
5005 }
5006 snprintf (str,20,"%d",cbuf[AM.sbufnum].numrhs);
5007 PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
5008 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)("0"),0,1);
5009 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)("0"),0,1);
5010 return(error);
5011}
5012
5013/*
5014 #] ClearOptimize :
5015*/
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
Definition sort.c:1171
LONG EndSort(PHEAD WORD *, int)
Definition sort.c:454
int Generator(PHEAD WORD *, WORD)
Definition proces.c:3249
LONG TimeWallClock(WORD)
Definition tools.c:3413
void LowerSortLevel(void)
Definition sort.c:4661
int StoreTerm(PHEAD WORD *)
Definition sort.c:4244
int NewSort(PHEAD0)
Definition sort.c:359
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
Definition pre.c:724
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
Definition mpi.c:1693
int PF_LongSingleSend(int to, int tag)
Definition mpi.c:1650
int PF_PrepareLongSinglePack(void)
Definition mpi.c:1561
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:783
int PF_Receive(int src, int tag, int *psrc, int *ptag)
Definition mpi.c:959
int PF_Send(int to, int tag)
Definition mpi.c:933
int PF_PreparePack(void)
Definition mpi.c:736
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:1613
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:754
int PF_PrepareLongMultiPack(void)
Definition mpi.c:1752
int PF_Broadcast(void)
Definition mpi.c:994
int PF_LongMultiBroadcast(void)
Definition mpi.c:1916
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:1579
vector< WORD > recycle_variables(const vector< WORD > &all_instr)
Definition optimize.cc:3913
int ClearOptimize()
Definition optimize.cc:4974
void optimize_print_code(int print_expr)
Definition optimize.cc:4524
int count_operators(const WORD *expr, bool print=false)
Definition optimize.cc:422
void generate_output(const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
Definition optimize.cc:4291
vector< vector< WORD > > get_brackets()
Definition optimize.cc:302
vector< WORD > Horner_tree(const WORD *expr, const vector< WORD > &order)
Definition optimize.cc:873
vector< WORD > occurrence_order(const WORD *expr, bool rev)
Definition optimize.cc:519
vector< optimization > find_optimizations(const vector< WORD > &instr)
Definition optimize.cc:2697
void find_Horner_MCTS()
Definition optimize.cc:2254
vector< WORD > merge_operators(const vector< WORD > &all_instr, bool move_coeff)
Definition optimize.cc:2402
void optimize_expression_given_Horner()
Definition optimize.cc:4060
vector< WORD > optimize_greedy(vector< WORD > instr, LONG time_limit)
Definition optimize.cc:3773
WORD generate_expression(WORD exprnr)
Definition optimize.cc:4440
int count_operators_cse(const vector< WORD > &tree)
Definition optimize.cc:1341
LONG get_expression(int exprnr)
Definition optimize.cc:225
bool do_optimization(const optimization optim, vector< WORD > &instr, int newid)
Definition optimize.cc:2930
int partial_factorize(vector< WORD > &instr, int n, int improve)
Definition optimize.cc:3479
void build_Horner_tree(const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
Definition optimize.cc:652
void fixarg(UWORD *t, WORD &n)
Definition optimize.cc:614
bool term_compare(const WORD *a, const WORD *b)
Definition optimize.cc:852
void my_random_shuffle(PHEAD RandomAccessIterator fr, RandomAccessIterator to)
Definition optimize.cc:201
int Optimize(WORD exprnr, int do_print)
Definition optimize.cc:4637
WORD getpower(const WORD *term, int var, int pos)
Definition optimize.cc:600
vector< WORD > generate_instructions(const vector< WORD > &tree, bool do_CSE)
Definition optimize.cc:1132
void PF_BroadcastBuffer(WORD **buffer, LONG *length)
Definition parallel.c:2125