42#ifndef CONFIG_H_INCLUDED
43#define CONFIG_H_INCLUDED
60#ifndef HAVE_UNORDERED_MAP
61#define HAVE_UNORDERED_MAP
63#ifndef HAVE_UNORDERED_SET
64#define HAVE_UNORDERED_SET
68#ifdef HAVE_UNORDERED_MAP
69 #include <unordered_map>
70 using std::unordered_map;
71#elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP)
72 #include <boost/unordered_map.hpp>
73 using boost::unordered_map;
75 #include <tr1/unordered_map>
76 using std::tr1::unordered_map;
78#ifdef HAVE_UNORDERED_SET
79 #include <unordered_set>
80 using std::unordered_set;
81#elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP)
82 #include <boost/unordered_set.hpp>
83 using boost::unordered_set;
85 #include <tr1/unordered_set>
86 using std::tr1::unordered_set;
89#if defined(HAVE_BUILTIN_POPCOUNT)
90 static inline int popcount(
unsigned int x) {
91 return __builtin_popcount(x);
93#elif defined(HAVE_POPCNT)
95 static inline int popcount(
unsigned int x) {
99 static inline int popcount(
unsigned int x) {
102 if ((x & 1) == 1) count++;
120const WORD OPER_ADD = -1;
121const WORD OPER_MUL = -2;
122const WORD OPER_COMMA = -3;
127 vector<tree_node> childs;
134 sum_results(0), num_visits(0), var(_var), finished(
false) {}
137 childs(other.childs),
138 sum_results(other.sum_results),
139 num_visits(other.num_visits),
141 finished(other.finished) {}
144 if (
this != &other) {
145 childs = other.childs;
146 sum_results = other.sum_results;
147 num_visits = other.num_visits;
149 finished = other.finished;
157vector<vector<WORD> > optimize_best_Horner_schemes;
158int optimize_num_vars;
159int optimize_best_num_oper;
160vector<WORD> optimize_best_instr;
161vector<WORD> optimize_best_vars;
164bool mcts_factorized, mcts_separated;
165vector<WORD> mcts_vars;
168set<pair<int,vector<WORD> > > mcts_best_schemes;
171pthread_mutex_t optimize_lock;
179void print_instr (
const vector<WORD> &instr, WORD num)
181 const WORD *tbegin = &*instr.begin();
182 const WORD *tend = tbegin+instr.size();
183 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
184 MesPrint(
"<%d> %a",num,t[2],t);
200template <
class RandomAccessIterator>
202 for (
int i=to-fr-1; i>0; --i)
203 swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
211static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
233 SetScratch(AR.infile,&(e->onfile));
236 WORD *term = AT.WorkPointer;
242 while (GetTerm(BHEAD term) > 0) {
243 AT.WorkPointer = term + *term;
245 WORD *t2 = term + *term;
246 if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0)
return -1;
249 AT.WorkPointer = term + *term;
254 LONG len =
EndSort(BHEAD (WORD *)((
void *)(&optimize_expr)),2);
256 AT.WorkPointer = term;
268LONG PF_get_expression (
int exprnr) {
270 if (PF.me == MASTER) {
273 if (PF.numtasks > 1) {
280#define get_expression PF_get_expression
305 bool has_brackets =
false;
306 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
307 WORD *tend=t+*t; tend-=ABS(*(tend-1));
308 for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
314 vector<vector<WORD> > brackets;
318 for (WORD *t=optimize_expr; *t!=0; t+=*t)
320 WORD *newexpr = (WORD *)Malloc1(exprlen*
sizeof(WORD),
"optimize newexpr");
325 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
329 vector<WORD> bracket;
330 for (; *t1!=HAAKJE; t1+=*(t1+1))
331 bracket.insert(bracket.end(), t1, t1+*(t1+1));
333 if (brackets.size()==0 || bracket!=brackets.back()) {
335 brackets.push_back(bracket);
339 WORD left = t + *t - t1;
340 bool more_symbols = (left != ABS(*(t+*t-1)));
343 newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
344 newexpr[i++] = SYMBOL;
345 newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
346 newexpr[i++] = SEPARATESYMBOL;
347 newexpr[i++] = sep_power;
355 newexpr[i++] = *(t1++);
365 if ( sep_power == 1 )
368 vector<WORD> bracket;
369 bracket.push_back(0);
370 bracket.push_back(0);
371 bracket.push_back(0);
372 bracket.push_back(0);
374 brackets.push_back(bracket);
376 newexpr[i++] = SYMBOL;
378 newexpr[i++] = SEPARATESYMBOL;
379 newexpr[i++] = sep_power;
384 for (t=optimize_expr; *t!=0; t+=*t) {}
385 if ( t-optimize_expr+1 < i ) {
386 M_free(optimize_expr,
"$-sort space");
387 optimize_expr = (WORD *)Malloc1(i*
sizeof(WORD),
"$-sort space");
393 memcpy(optimize_expr, newexpr, i*
sizeof(WORD));
394 M_free(newexpr,
"optimize newexpr");
397 if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
398 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
399 if (*t == ABS(*(t+*t-1))+1)
continue;
401 for (
int i=3; i<t[2]; i+=2)
402 if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
404 return vector<vector<WORD> >();
425 while (*(expr+n)!=0) n+=*(expr+n);
427 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
428 WORD maxpowfac=1, maxpowsep=1;
430 for (
const WORD *t=expr; *t!=0; t+=*t) {
431 if (t!=expr) cntadd++;
432 if (*t==ABS(*(t+*t-1))+1)
continue;
437 for (
int i=3; i<t[2]; i+=2) {
438 if (t[i]==FACTORSYMBOL) {
439 maxpowfac = max(maxpowfac, t[i+1]);
442 if (t[i]==SEPARATESYMBOL) {
443 maxpowsep = max(maxpowsep, t[i+1]);
448 sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
450 if (t[i+1]==2) cntmul++;
454 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++;
456 if (cntsym > 0) cntmul+=cntsym-1;
459 cntadd -= maxpowfac-1;
460 cntmul += maxpowfac-1;
462 cntadd -= maxpowsep-1;
465 MesPrint (
"*** STATS: original %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
467 return sumpow+cntmul+cntadd;
478 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
480 const WORD *ebegin = &*instr.begin();
481 const WORD *eend = ebegin+instr.size();
483 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
484 for (
const WORD *t=e+3; *t!=0; t+=*t) {
486 if (*(e+1)==OPER_ADD) cntadd++;
487 if (*(e+1)==OPER_MUL) cntmul++;
489 if (*t == ABS(*(t+*t-1))+1)
continue;
490 if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
491 if (*(t+4)==2) cntmul++;
494 sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
497 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++;
502 MesPrint (
"*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
504 return sumpow+cntmul+cntadd;
523 for (
const WORD *t=expr; *t!=0; t+=*t) {
524 if (*t == ABS(*(t+*t-1))+1)
continue;
526 for (
int i=3; i<t[2]; i+=2)
530 bool is_fac=
false, is_sep=
false;
531 if (cnt.count(FACTORSYMBOL)) {
533 cnt.erase(FACTORSYMBOL);
535 if (cnt.count(SEPARATESYMBOL)) {
537 cnt.erase(SEPARATESYMBOL);
541 vector<pair<int,WORD> > cnt_order;
542 for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
543 cnt_order.push_back(make_pair(i->second, i->first));
544 sort(cnt_order.rbegin(), cnt_order.rend());
548 for (
int i=0; i<(int)cnt_order.size(); i++)
549 order.push_back(cnt_order[i].second);
551 if (rev) reverse(order.begin(),order.end());
554 if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
555 if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
600WORD
getpower (
const WORD *term,
int var,
int pos) {
601 if (*term == ABS(*(term+*term-1))+1)
return 0;
602 if (2*pos+2 >= term[2])
return 0;
603 if (term[2*pos+3] != var)
return 0;
604 return term[2*pos+4];
615 int an=ABS(n), sn=SGN(n);
616 while (*(t+an-1)==0) an--;
620void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
623 GcdLong(BHEAD a,na,b,nb,c,nc);
626void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd) {
629 DivLong(a,na,b,nb,c,nc,d,nd);
652void build_Horner_tree (
const WORD **terms,
int numterms,
int var,
int maxvar,
int pos, vector<WORD> *res) {
660 for (
int fr=0; fr<numterms; fr++) {
663 const WORD *t = terms[fr];
666 if (*t != ABS(*(t+*t-1))+1)
667 for (
int i=3+2*pos; i<t[2]; i+=2) {
668 res->push_back(SYMBOL);
670 res->push_back(t[i]);
671 res->push_back(t[i+1]);
672 if (!empty) res->push_back(OPER_MUL);
678 res->push_back(SNUMBER);
686 res->push_back(SNUMBER);
687 WORD n = ABS(*(t+*t-1));
689 for (
int i=0; i<n; i++)
690 res->push_back(*(t+*t-n+i));
691 res->push_back(OPER_MUL);
693 if (fr>0) res->push_back(OPER_ADD);
700 res->push_back(SNUMBER);
705 res->push_back(OPER_MUL);
711 WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
712 UWORD *num = NumberMalloc(
"build_Horner_tree");
713 UWORD *den = NumberMalloc(
"build_Horner_tree");
714 UWORD *tmp = NumberMalloc(
"build_Horner_tree");
715 UWORD *dum = NumberMalloc(
"build_Horner_tree");
718 int prev_coeff_idx = -1;
720 for (
int fr=0; fr<numterms;) {
723 WORD pow =
getpower(terms[fr], var, pos);
727 while (to<numterms &&
getpower(terms[to], var, pos) == pow) to++;
732 if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
734 WORD n1 = res->at(res->size()-2) / 2;
735 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
737 WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
738 WORD n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
741 GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
742 GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
745 for (
int i=0; i<2; i++) {
746 if (i==1 && fr==0)
break;
748 WORD *t = i==0 ? t1 : t2;
749 WORD n = i==0 ? n1 : n2;
751 DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
752 for (
int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
753 for (
int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
755 if (SGN(ntmp) != SGN(n)) n=-n;
757 DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
758 for (
int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
759 for (
int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
761 *t++ = SGN(n) * (2*ABS(n)+1);
765 if (fr>0) res->push_back(OPER_ADD);
768 WORD nextpow = (to==numterms ? 0 :
getpower(terms[to], var, pos));
770 if (pow-nextpow > 0) {
771 res->push_back(SYMBOL);
774 res->push_back(pow-nextpow);
775 res->push_back(OPER_MUL);
779 res->push_back(SNUMBER);
780 WORD n = MaX(ABS(nnum),nden);
781 res->push_back(n*2+3);
782 for (
int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
783 for (
int i=0; i<n-ABS(nnum); i++) res->push_back(0);
784 for (
int i=0; i<nden; i++) res->push_back(den[i]);
785 for (
int i=0; i<n-ABS(nden); i++) res->push_back(0);
786 res->push_back(SGN(nnum)*(2*n+1));
787 res->push_back(OPER_MUL);
789 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
791 else if (AN.poly_vars[var]==FACTORSYMBOL) {
796 WORD n1 = res->at(res->size()-2) / 2;
797 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
798 WORD *t2 = &res->at(prev_coeff_idx);
799 WORD n2 = *(t2+*(t2+1)-1) / 2;
802 MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
806 for (
int i=0; i<ABS(n2); i++)
807 t2[i] = t2[n2+i] = i==0 ? 1 : 0;
811 for (
int i=0; i<2*ABS(n1)+2; i++)
815 res->back() = 2*ABS(ntmp)+3;
816 res->insert(res->end(), tmp, tmp+2*ABS(ntmp));
817 res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1));
818 res->push_back(OPER_MUL);
821 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
825 res->push_back(OPER_MUL);
829 res->push_back(OPER_COMMA);
837 NumberFree(dum,
"build_Horner_tree");
838 NumberFree(tmp,
"build_Horner_tree");
839 NumberFree(den,
"build_Horner_tree");
840 NumberFree(num,
"build_Horner_tree");
853 if (*a == ABS(*(a+*a-1))+1)
return true;
854 if (*b == ABS(*(b+*b-1))+1)
return false;
855 if (a[1]!=SYMBOL)
return true;
856 if (b[1]!=SYMBOL)
return false;
857 for (
int i=3; i<a[2] && i<b[2]; i+=2) {
858 if (a[i] !=b[i] )
return a[i] >b[i];
859 if (a[i+1]!=b[i+1])
return a[i+1]<b[i+1];
873vector<WORD>
Horner_tree (
const WORD *expr,
const vector<WORD> &order) {
875 MesPrint (
"*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
881 map<WORD,WORD> renum;
882 AN.poly_num_vars = order.size();
883 AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*
sizeof(WORD),
"AN.poly_vars");
884 for (
int i=0; i<AN.poly_num_vars; i++) {
885 AN.poly_vars[i] = order[i];
891 WORD* dynamicAlloc = 0;
894 for (
const WORD *t=expr; *t!=0; t+=*t) {
900 if ( AT.WorkPointer + sumsize + 1 > AT.WorkTop ) {
901 dynamicAlloc = (WORD*)Malloc1(
sizeof(*dynamicAlloc)*(sumsize+1),
"Horner_tree alloc");
902 sorted = dynamicAlloc;
905 sorted = AT.WorkPointer;
908 for (
const WORD *t=expr; *t!=0; t+=*t) {
909 memcpy(sorted, t, *t*
sizeof(WORD));
911 if (*t != ABS(*(t+*t-1))+1) {
915 for (
int i=3; i<sorted[2]; i+=2) {
916 if (!renum.count(sorted[i])) {
917 renum[sorted[i]] = AN.poly_num_vars;
919 WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*
sizeof(WORD),
"AN.poly_vars");
920 memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*
sizeof(WORD));
921 M_free(AN.poly_vars,
"poly_vars");
922 AN.poly_vars = new_poly_vars;
923 AN.poly_vars[AN.poly_num_vars] = sorted[i];
926 sorted[i] = renum[sorted[i]];
929 for (
int i=0; i<sorted[2]/2; i++)
930 for (
int j=3; j+2<sorted[2]; j+=2)
931 if (sorted[j] > sorted[j+2]) {
932 swap(sorted[j] , sorted[j+2]);
933 swap(sorted[j+1], sorted[j+3]);
941 if ( dynamicAlloc ) {
942 sorted = dynamicAlloc;
945 sorted = AT.WorkPointer;
949 vector<const WORD *> terms;
950 for (
const WORD *t=sorted; *t!=0; t+=*t)
960 for (
int i=0; i<(int)res.size();) {
961 if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
963 else if (res[i]==SYMBOL) {
964 memmove(&res[j], &res[i], res[i+1]*
sizeof(WORD));
968 else if (res[i]==SNUMBER) {
969 int n = (res[i+1]-2)/2;
971 while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
973 res[j++] = 2*(n-dn) + 3;
974 memmove(&res[j], &res[i+2], (n-dn)*
sizeof(WORD));
976 memmove(&res[j], &res[i+n+2], (n-dn)*
sizeof(WORD));
978 res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
984 if ( dynamicAlloc ) {
985 M_free(dynamicAlloc,
"Horner_tree alloc");
989 MesPrint (
"*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
1001void print_tree (
const vector<WORD> &
tree) {
1005 for (
int i=0; i<(int)
tree.size();) {
1006 if (
tree[i]==OPER_ADD) {
1010 else if (
tree[i]==OPER_MUL) {
1014 else if (
tree[i]==OPER_COMMA) {
1018 else if (
tree[i]==SNUMBER) {
1021 PrtLong((UWORD *)&
tree[i+2], n, buf);
1022 int l = strlen((
char *)buf);
1025 PrtLong((UWORD *)&
tree[i+2+n], n, buf+l+1);
1026 MesPrint(
"%s%",buf);
1029 else if (
tree[i]==SYMBOL) {
1030 if (AN.poly_vars[
tree[i+2]] < 10000)
1031 MesPrint(
"%s^%d%", VARNAME(symbols,AN.poly_vars[
tree[i+2]]),
tree[i+3]);
1033 MesPrint(
"Z%d^%d%", MAXVARIABLES-AN.poly_vars[
tree[i+2]],
tree[i+3]);
1054 size_t operator()(
const vector<WORD>& n)
const {
1060 bool operator()(
const vector<WORD>& lhs,
const vector<WORD>& rhs)
const {
1061 if (lhs.size() != rhs.size())
return false;
1062 for (
unsigned int i = 1; i < lhs.size(); i++) {
1063 if (lhs[i] != rhs[i])
return false;
1070template<
typename T>
size_t hash_range(T* array,
int size) {
1073 for (
int i = 0; i < size; i++) {
1074 hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
1134 MesPrint (
"*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1135 thetime_str().c_str(), do_CSE?1:0);
1138 typedef unordered_map<vector<WORD>, int,
CSEHash,
CSEEq> csemap;
1144 ID.rehash(mcts_expr_score * 2);
1156 for (
int i=0; i<(int)
tree.size();) {
1159 if (
tree[i]==SNUMBER) {
1160 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1162 x.push_back(SNUMBER);
1164 int sign = SGN(x.back());
1167 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1169 s.push(sign * suc.first->second);
1174 else if (
tree[i]==SYMBOL) {
1175 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1178 x.push_back(SYMBOL);
1179 x.push_back(
tree[i+2]+1);
1180 x.push_back(
tree[i+3]);
1182 csemap::iterator it = ID.find(x);
1183 if (do_CSE && it != ID.end()) {
1187 s.push(EXTRASYMBOL);
1190 if (numinstr == MAXPOSITIVE) {
1191 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1196 instr.push_back(numinstr);
1197 instr.push_back(OPER_MUL);
1198 instr.push_back(9+OPTHEAD);
1200 instr.push_back(SYMBOL);
1202 instr.push_back(
tree[i+2]);
1203 instr.push_back(
tree[i+3]);
1212 s.push(EXTRASYMBOL);
1218 s.push(
tree[i+2]+1);
1232 hash.push_back(oper);
1235 for (
int operand=0; operand<2; operand++) {
1236 hash.push_back(s.top()); s.pop();
1237 x.push_back(s.top()); s.pop();
1238 x.push_back(s.top()); s.pop();
1239 x.push_back(s.top()); s.pop();
1242 x[0] = hash_range(&hash[0], 3);
1245 if (oper==OPER_MUL) {
1246 bool do_continue =
false;
1248 for (
int operand=0; operand<2; operand++) {
1249 int idx_oper1 = operand==0 ? 2 : 5;
1250 int idx_oper2 = operand==0 ? 5 : 2;
1253 if (x[idx_oper1]==SNUMBER) {
1255 int idx = ABS(x[idx_oper1+1])-1;
1256 if (
tree[idx+2]==1 &&
tree[idx+3]==1 && ABS(
tree[idx+4])==3) {
1258 s.push(x[idx_oper2+2]);
1259 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1260 s.push(x[idx_oper2]);
1261 s.push(hash[1 + (operand + 1) % 2]);
1268 if (do_continue)
continue;
1273 csemap::iterator it = ID.find(x);
1274 if (!do_CSE || it == ID.end()) {
1275 if (numinstr == MAXPOSITIVE) {
1276 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1280 instr.push_back(numinstr);
1281 instr.push_back(x[1]);
1286 int lenidx = instr.size()-1;
1288 for (
int j=0; j<2; j++)
1289 if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
1291 instr.push_back(x[3*j+2]);
1293 instr.push_back(ABS(x[3*j+3])-1);
1294 instr.push_back(x[3*j+4]);
1297 instr.push_back(3*SGN(x[3*j+3]));
1301 int t = ABS(x[3*j+3])-1;
1302 instr.push_back(
tree[t+1]-1);
1303 instr.insert(instr.end(), &
tree[t+2], &
tree[t]+
tree[t+1]);
1304 instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
1305 instr[lenidx] +=
tree[t+1]-1;
1315 s.push(EXTRASYMBOL);
1321 MesPrint (
"*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1344 typedef unordered_map<vector<WORD>, int,
CSEHash,
CSEEq> csemap;
1349 ID.rehash(mcts_expr_score * 2);
1355 int numinstr = 0, numcommas = 0;
1359 for (
int i=0; i<(int)
tree.size();) {
1362 if (
tree[i]==SNUMBER) {
1363 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1365 x.push_back(SNUMBER);
1367 int sign = SGN(x.back());
1370 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1372 s.push(sign * suc.first->second);
1377 else if (
tree[i]==SYMBOL) {
1378 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1381 x.push_back(SYMBOL);
1382 x.push_back(
tree[i+2]+1);
1383 x.push_back(
tree[i+3]);
1385 csemap::iterator it = ID.find(x);
1386 if (it != ID.end()) {
1390 s.push(EXTRASYMBOL);
1392 if (
tree[i + 3] == 2)
1395 numinstr += (int)floor(log(
tree[i+3])/log(2.0)) + popcount(
tree[i+3]) - 1;
1400 s.push(EXTRASYMBOL);
1406 s.push(
tree[i+2]+1);
1421 hash.push_back(oper);
1424 for (
int operand=0; operand<2; operand++) {
1425 hash.push_back(s.top()); s.pop();
1426 x.push_back(s.top()); s.pop();
1427 x.push_back(s.top()); s.pop();
1428 x.push_back(s.top()); s.pop();
1432 x[0] = hash_range(&hash[0], 3);
1435 if (oper==OPER_MUL) {
1436 bool do_continue =
false;
1438 for (
int operand=0; operand<2; operand++) {
1439 int idx_oper1 = operand==0 ? 2 : 5;
1440 int idx_oper2 = operand==0 ? 5 : 2;
1443 if (x[idx_oper1]==SNUMBER) {
1445 int idx = ABS(x[idx_oper1+1])-1;
1446 if (
tree[idx+2]==1 &&
tree[idx+3]==1 && ABS(
tree[idx+4])==3) {
1448 s.push(x[idx_oper2+2]);
1449 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1450 s.push(x[idx_oper2]);
1451 s.push(hash[1 + (operand + 1) % 2]);
1458 if (do_continue)
continue;
1463 csemap::iterator it = ID.find(x);
1464 if (it == ID.end()) {
1465 if (numinstr == MAXPOSITIVE) {
1466 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1470 if (oper == OPER_COMMA) numcommas++;
1474 s.push(EXTRASYMBOL);
1479 s.push(EXTRASYMBOL);
1487 return numinstr - numcommas;
1502 node() : l(NULL), r(NULL), sign(1), hash(0) {};
1503 node(
const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
1507 int cmp(
const struct node* rhs)
const {
1508 if (
this == rhs)
return 0;
1510 if (data[0] != rhs->data[0])
return data[0] < rhs->data[0] ? -1 : 1;
1511 int mod = data[0] == SNUMBER ? -1 : 0;
1512 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1513 for (
int i = 0; i < data[1] + mod; i++) {
1514 if (data[i] != rhs->data[i])
return data[i] < rhs->data[i] ? -1 : 1;
1517 int lv = l->cmp(rhs->l);
1518 if (lv != 0)
return lv;
1519 int rv = r->cmp(rhs->r);
1520 if (rv != 0)
return rv;
1523 if (l->sign != rhs->l->sign)
return l->sign < rhs->l->sign ? -1 : 1;
1524 if (r->sign != rhs->r->sign)
return r->sign < rhs->r->sign ? -1 : 1;
1531 bool operator() (
const struct node* lhs,
const struct node* rhs)
const
1533 return lhs->cmp(rhs) < 0;
1537 int mod = data[0] == SNUMBER ? -1 : 0;
1538 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1539 hash = hash_range(data, data[1] + mod);
1541 if (l->hash == 0) l->calcHash();
1542 if (r->hash == 0) r->calcHash();
1545 size_t newr[] = {(size_t)data[0], l->hash, (
size_t)l->sign, r->hash, (size_t)r->sign};
1546 hash = hash_range(newr, 5);
1552 size_t operator()(
const NODE* n)
const {
1558 bool operator()(
const NODE* lhs,
const NODE* rhs)
const {
1559 return lhs->cmp(rhs) == 0;
1563NODE* buildTree(vector<WORD> &
tree) {
1569 unsigned int curIndex = 0;
1572 for (
int i=0; i<(int)
tree.size();) {
1577 if (
tree[i]==SYMBOL ||
tree[i] == SNUMBER) {
1579 if (
tree[i] == SNUMBER) {
1580 c->sign = SGN(
tree[i +
tree[i + 1] -1]);
1587 c->r = st.top(); st.pop();
1588 c->l = st.top(); st.pop();
1592 if (c->data[0] == OPER_MUL) {
1593 NODE* ch[] = {c->r, c->l};
1594 for (
int j = 0; j < 2; j++)
1595 if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
1596 ch[(j+1)%2]->sign *= ch[j]->sign;
1616 for (
unsigned int i = 0; i < curIndex; i++) {
1617 if (ar[i].l == ar) ar[i].l = st.top();
1618 if (ar[i].r == ar) ar[i].r = st.top();
1621 swap(ar[0], *st.top());
1625int count_operators_cse_topdown (vector<WORD> &
tree) {
1626 typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
1631 ID.rehash(mcts_expr_score * 2);
1639 while (!stack.empty())
1641 NODE* c = stack.top();
1644 if (c->data[0] == SYMBOL) {
1645 if (c->data[3] > 1) {
1646 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1648 if (c->data[3] == 2)
1651 numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
1655 if (c->data[0] != SNUMBER) {
1657 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1663 if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
1671 M_free(root,
"CSE tree");
1680vector<WORD> simulated_annealing() {
1681 float minT = AO.Optimize.saMinT.fval;
1682 float maxT = AO.Optimize.saMaxT.fval;
1684 float coolrate = pow(minT / maxT, 1 / (
float)AO.Optimize.saIter);
1691 if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
1692 if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
1697 int curscore = count_operators_cse_topdown(
tree);
1700 AN.poly_num_vars = 0;
1701 M_free(AN.poly_vars,
"poly_vars");
1703 std::vector<WORD> best = state;
1704 int bestscore = curscore;
1706 if (startindex == (
int)state.size() || (
int)state.size() - startindex < 2) {
1710 for (
int o = 0; o < AO.Optimize.saIter; o++) {
1711 int inda = iranf(BHEAD state.size() - startindex) + startindex;
1712 int indb = iranf(BHEAD state.size() - startindex) + startindex;
1718 swap(state[inda], state[indb]);
1721 int newscore = count_operators_cse_topdown(
tree);
1724 AN.poly_num_vars = 0;
1725 M_free(AN.poly_vars,
"poly_vars");
1727 if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (
float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
1728 curscore = newscore;
1730 if (curscore < bestscore) {
1731 bestscore = curscore;
1735 swap(state[inda], state[indb]);
1739 MesPrint(
"Score at step %d: %d", o, curscore);
1745 MesPrint(
"Simulated annealing score: %d", bestscore);
1827inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
1829 vector<WORD> &order = *porder;
1830 vector<WORD> &schemev = *pscheme;
1831 vector<tree_node *> &path = *ppath;
1832 int depth = 0, nchild0;
1833 float slide_down_factor = 1.0;
1840 path.push_back(select);
1841 nchild0 = select->childs.size();
1842 while (select->childs.size() > 0) {
1844 select->num_visits++;
1845 select->sum_results+=mcts_expr_score;
1848 switch ( AO.Optimize.mctsdecaymode ) {
1850 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1853 if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
1854 slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
1855 slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
1858 slide_down_factor = 0.0001;
1862 float dd = 1.0-(1.0*depth)/(1.0*nchild0);
1863 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1864 if ( dd <= 0.000001 ) slide_down_factor = 1.0;
1865 else slide_down_factor /= dd;
1866 if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
1872 MesPrint(
"select %d",select->var);
1878 for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
1880 if (p->num_visits >= 1) {
1883 score = mcts_expr_score / (p->sum_results/p->num_visits) +
1887 2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
1890 printf(
"%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1891 p->num_visits,p->finished?1:0);
1900 printf(
"%d: inf\n",p->var); fflush(stdout);
1905 if (!p->finished && score>best) {
1913 select->finished=
true;
1919 path.push_back(select);
1920 order.push_back(select->var);
1927 MesPrint(
"expand %d",select->var);
1933 for (
int i=0; i<(int)order.size(); i++)
1934 var_used.insert(ABS(order[i])-1);
1937 if (!select->finished && select->childs.size()==0) {
1939 int sign = (order.size() == 0) ? 1 : SGN(order.back());
1940 for (
int i=0; i<(int)mcts_vars.size(); i++)
1941 if (!var_used.count(mcts_vars[i])) {
1942 new_node.childs.push_back(
tree_node(sign*(mcts_vars[i]+1)));
1943 if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
1944 new_node.childs.push_back(
tree_node(-sign*(mcts_vars[i]+1)));
1950 LOCK(optimize_lock);
1952 UNLOCK(optimize_lock);
1955 if (select->childs.size()==0)
1956 select->finished =
true;
1959 select->num_visits++;
1960 select->sum_results+=mcts_expr_score;
1967 for (
int i=0; i<(int)mcts_vars.size(); i++)
1968 if (!var_used.count(mcts_vars[i]))
1969 scheme.push_back(mcts_vars[i]);
1972 for (
int i=(
int)order.size()-1; i>=0; i--) {
1974 scheme.push_front(order[i]-1);
1976 scheme.push_back(-order[i]-1);
1980 if (mcts_factorized)
1981 scheme.push_front(FACTORSYMBOL);
1983 scheme.push_front(SEPARATESYMBOL);
1986 schemev = vector<WORD>(scheme.begin(), scheme.end());
1996inline static void try_MCTS_scheme (PHEAD
const vector<WORD> &scheme,
int *pnum_oper) {
2006 int num_oper = count_operators_cse_topdown(
tree);
2009 AN.poly_num_vars = 0;
2010 M_free(AN.poly_vars,
"poly_vars");
2012 *pnum_oper = num_oper;
2022inline static void update_MCTS_scheme (
int num_oper,
const vector<WORD> &scheme, vector<tree_node *> *ppath) {
2024 vector<tree_node *> &path = *ppath;
2027 if ((
int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
2028 (--mcts_best_schemes.end())->first > num_oper) {
2033 LOCK(optimize_lock);
2034 mcts_best_schemes.insert(make_pair(num_oper,scheme));
2035 if ((
int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
2036 mcts_best_schemes.erase(--mcts_best_schemes.end());
2037 UNLOCK(optimize_lock);
2044 for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
2045 (*p)->sum_results += num_oper - mcts_expr_score;
2053void find_Horner_MCTS_expand_tree () {
2056 MesPrint (
"*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2066 vector<WORD> scheme;
2069 vector<tree_node *> path;
2074 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2075 try_MCTS_scheme(BHEAD scheme, &num_oper);
2078 MesPrint (
"{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2080 update_MCTS_scheme(num_oper, scheme, &path);
2083 MesPrint (
"*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2084 thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2098vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2101void PF_find_Horner_MCTS_expand_tree_master_init () {
2103 PF_opt_MCTS_tasks.resize(PF.numtasks);
2104 for (
int i = 1; i < PF.numtasks; i++) {
2105 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2113int PF_find_Horner_MCTS_expand_tree_master_next () {
2117 PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2120 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2121 if (!p.first.empty()) {
2125 update_MCTS_scheme(num_oper, p.first, &p.second);
2137void PF_find_Horner_MCTS_expand_tree_master () {
2140 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2144 vector<WORD> scheme;
2145 vector<tree_node *> path;
2147 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2150 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2154 int len = scheme.size();
2160 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2165 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2171void PF_find_Horner_MCTS_expand_tree_master_wait () {
2174 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2178 for (
int i = 1; i < PF.numtasks; i++) {
2179 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2188 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2194void PF_find_Horner_MCTS_expand_tree_slave () {
2197 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2200 vector<WORD> scheme;
2207 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2219 if (len == 0)
break;
2225 try_MCTS_scheme(scheme, &num_oper);
2229 PF_Pack(&num_oper, 1, PF_INT);
2230 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2234 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2257 if (PF.me != MASTER) {
2258 if (PF.numtasks <= 1)
return;
2259 PF_find_Horner_MCTS_expand_tree_slave();
2265 MesPrint (
"*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2278 for (WORD *t=optimize_expr; *t!=0; t+=*t)
2280 for (
int i=3; i<t[2]; i+=2)
2281 var_set.insert(t[i]);
2285 mcts_factorized = var_set.count(FACTORSYMBOL);
2286 if (mcts_factorized) var_set.erase(FACTORSYMBOL);
2287 mcts_separated = var_set.count(SEPARATESYMBOL);
2288 if (mcts_separated) var_set.erase(SEPARATESYMBOL);
2290 mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
2291 optimize_num_vars = (int)mcts_vars.size();
2293 for (
int i=0; i<(int)mcts_vars.size(); i++) {
2294 if (AO.Optimize.hornerdirection != O_BACKWARD)
2295 mcts_root.childs.push_back(
tree_node(+(mcts_vars[i]+1)));
2296 if (AO.Optimize.hornerdirection != O_FORWARD)
2297 mcts_root.childs.push_back(
tree_node(-(mcts_vars[i]+1)));
2302 PF_find_Horner_MCTS_expand_tree_master_init();
2310 for (
int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
2311 (AO.Optimize.mctstimelimit==0 || (
TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
2313 AT.optimtimes = times;
2315#if defined(WITHPTHREADS)
2316 if (AM.totalnumberofthreads > 1)
2317 find_Horner_MCTS_expand_tree_threaded();
2319#elif defined(WITHMPI)
2320 if (PF.numtasks > 1)
2321 PF_find_Horner_MCTS_expand_tree_master();
2324 find_Horner_MCTS_expand_tree();
2332 PF_find_Horner_MCTS_expand_tree_master_wait();
2335 MesPrint (
"*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2405 MesPrint (
"*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2409 vector<const WORD *> instr;
2410 const WORD *tbegin = &*all_instr.begin();
2411 const WORD *tend = tbegin+all_instr.size();
2413 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
2416 int n = instr.size();
2419 vector<int> par(n), numpar(n,0);
2420 for (
int i=0; i<n; i++) par[i]=i;
2422 for (
int i=0; i<n; i++) {
2423 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2424 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
2431 if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
2432 if (*(t+4)>1) numpar[*(t+3)]++;
2439 vector<bool> vis(n,
false);
2441 while (!s.empty()) {
2443 int i=s.top(); s.pop();
2444 if (vis[i])
continue;
2447 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
2448 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
2452 if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
2453 par[i] = par[par[i]];
2459 vector<WORD> newinstr;
2462 stack<WORD> new_expr;
2464 vis = vector<bool>(n,
false);
2467 vector<bool> skip(n,
false), skipcoeff(n,
false);
2468 for (
int i=0; i<n; i++)
2469 if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=
true;
2472 vector<int> renum_par(n);
2473 for (
int i=0; i<n; i++)
2476 while (!new_expr.empty()) {
2477 int x = new_expr.top(); new_expr.pop();
2478 if (vis[x])
continue;
2483 bool first_copy=
true;
2484 int lenidx = newinstr.size()+2;
2487 stack<WORD> this_expr;
2488 this_expr.push(x+1);
2490 while (!this_expr.empty()) {
2492 int i = this_expr.top(); this_expr.pop();
2495 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2500 bool copy = !skip[i];
2501 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
2502 if (par[*(t+3)] == x) {
2505 this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
2506 if (*(instr[i]+1) == OPER_MUL) sign=1;
2510 new_expr.push(*(t+3));
2514 if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
2521 newinstr.push_back(renum_par[x]);
2522 newinstr.push_back(*(instr[x]+1));
2523 newinstr.push_back(3);
2528 int thislenidx = newinstr.size();
2529 newinstr.insert(newinstr.end(), t, t+*t);
2530 newinstr.back() *= sign;
2531 if (*(instr[i]+1) == OPER_MUL) sign=1;
2532 newinstr[lenidx] += *t;
2536 if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
2537 *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
2540 const WORD *t1 = instr[*(t+3)]+OPTHEAD;
2541 const WORD *t2 = t1+*t1;
2543 if (*t1 == 1+ABS(*(t1+*t1-1))) {
2547 WORD *t3 = &*newinstr.end();
2548 int sign2 = SGN(t3[-1]);
2549 newinstr.erase(newinstr.end()-3, newinstr.end());
2552 for (
const WORD *tt=t1; *tt!=0; tt+=*tt) {
2555 if (numargs==2 && *(t2+4)==1) {
2557 newinstr[newinstr.size()-4] = *(t2+1);
2558 newinstr[newinstr.size()-3] = *(t2+2);
2559 newinstr[newinstr.size()-2] = *(t2+3);
2560 newinstr[newinstr.size()-1] = *(t2+4);
2561 sign2 *= SGN(*(t2+*t2-1));
2565 if (*(t2+1)==EXTRASYMBOL)
2566 renum_par[*(t+3)] = *(t2+3);
2573 if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
2574 for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
2575 if ( tt[*tt-1] < 0 ) {
2576 tt[*tt-1] = -tt[*tt-1];
2581 skipcoeff[*(t+3)]=
true;
2585 newinstr.insert(newinstr.end(), t1+1, t1+*t1);
2586 newinstr.back() *= sign2;
2587 newinstr[thislenidx] += ABS(newinstr.back()) - 3;
2588 newinstr[lenidx] += ABS(newinstr.back()) - 3;
2597 newinstr.push_back(0);
2607 vector<int> renum(n,-1);
2609 for (
int i=0; i<n; i++)
2610 if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
2613 tbegin = &*newinstr.begin();
2614 tend = tbegin+newinstr.size();
2615 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2))
2616 instr[renum[*t]] = t;
2620 vector<WORD> sortinstr;
2621 for (
int i=0; i<next; i++) {
2622 int idx = sortinstr.size();
2623 sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2625 sortinstr[idx] = renum[sortinstr[idx]];
2628 for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
2629 if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
2630 *(t2+3) = renum[*(t2+3)];
2634 MesPrint (
"*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2671 int type, arg1, arg2, improve;
2673 vector<int> eqnidxs;
2676 if (arg1 != a.arg1)
return arg1 < a.arg1;
2677 if (arg2 != a.arg2)
return arg2 < a.arg2;
2678 if (type != a.type)
return type < a.type;
2679 return coeff < a.coeff;
2700 MesPrint (
"*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2705 vector<optimization> res;
2709 map<optimization, vector<int> > cnt;
2712 map<vector<int>,
int> idx_coeff;
2714 const WORD *ebegin = &*instr.begin();
2715 const WORD *eend = ebegin+instr.size();
2717 for (
int optim_type=0; optim_type<=4; optim_type++) {
2723 optim.type = optim_type;
2727 if (optim_type == 0) {
2728 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2729 if (*(e+1) != OPER_MUL)
continue;
2730 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2731 if (*t == ABS(*(t+*t-1))+1)
continue;
2733 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2734 optim.arg2 = *(t+4);
2735 cnt[optim].push_back(e-ebegin);
2742 if (optim_type == 1) {
2743 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2744 if (*(e+1) != OPER_MUL)
continue;
2746 for (
const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2747 if (*t1 == ABS(*(t1+*t1-1))+1)
continue;
2748 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2750 for (
const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2751 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
2752 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2754 if (*(t1+4) == *(t2+4)) {
2757 if (optim.arg1 > optim.arg2)
2758 swap(optim.arg1, optim.arg2);
2761 cnt[optim].push_back(e-ebegin);
2764 cnt[optim].push_back(e-ebegin);
2765 cnt[optim].push_back(e-ebegin);
2774 if (optim_type == 2) {
2775 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2777 if (*(e+1) == OPER_ADD) {
2780 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2781 if (*t == ABS(*(t+*t-1))+1)
continue;
2784 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1)
continue;
2786 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2787 optim.coeff.back() = ABS(optim.coeff.back());
2789 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2792 cnt[optim].push_back(e-ebegin);
2796 else if (*(e+1) == OPER_MUL) {
2798 optim.coeff.clear();
2800 for (
const WORD *t=e+3; *t!=0; t+=*t)
2801 if (*t == ABS(*(t+*t-1))+1) {
2802 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1)
continue;
2803 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2804 optim.coeff.back() = ABS(optim.coeff.back());
2807 if (!optim.coeff.empty())
2808 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2809 if (*t == ABS(*(t+*t-1))+1)
continue;
2810 if (*(t+4) != 1)
continue;
2811 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2813 cnt[optim].push_back(e-ebegin);
2820 if (optim_type == 3) {
2821 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2823 if (*(e+1) != OPER_ADD)
continue;
2825 optim.coeff.clear();
2827 for (
const WORD *t=e+3; *t!=0; t+=*t)
2828 if (*t == ABS(*(t+*t-1))+1) {
2829 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2832 if (!optim.coeff.empty()) {
2833 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2834 if (*t == ABS(*(t+*t-1))+1)
continue;
2835 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1)
continue;
2836 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2838 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2840 cnt[optim].push_back(e-ebegin);
2841 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2848 if (optim_type == 4) {
2849 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2850 if (*(e+1) != OPER_ADD)
continue;
2852 for (
const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2853 if (*t1 == ABS(*(t1+*t1-1))+1)
continue;
2854 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2856 for (
const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2857 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
2858 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2860 int sign1 = SGN(*(t1+*t1-1));
2861 int sign2 = SGN(*(t2+*t2-1));
2863 if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
2865 optim.type = (sign1 * sign2 == 1 ? 4 : 5);
2868 if (optim.arg1 > optim.arg2) {
2869 swap(optim.arg1, optim.arg2);
2872 if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
2873 cnt[optim].push_back(e-ebegin);
2876 cnt[optim].push_back(e-ebegin);
2877 cnt[optim].push_back(e-ebegin);
2888 for (map<
optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
2889 int improve = i->second.size() - 1;
2891 res.push_back(i->first);
2892 res.back().improve = improve;
2893 res.back().eqnidxs = i->second;
2896 res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
2903 MesPrint (
"*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2935 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2936 thetime_str().c_str(), optim.improve,
2937 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1, optim.arg2);
2938 else if (optim.type==1 || optim.type>=4)
2939 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2940 thetime_str().c_str(), optim.improve,
2941 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1,
2942 optim.type==1 ?
'*' : optim.type==4 ?
'+' :
'-',
2943 optim.arg2>0?
'x':
'Z', ABS(optim.arg2)-1);
2945 WORD n = optim.coeff.back()/2;
2946 UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2947 PrtLong((UWORD *)&optim.coeff[0], n, num);
2948 PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2949 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2950 thetime_str().c_str(), optim.improve,
2951 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1,
2952 optim.type==2 ?
'*' :
'+', num,den);
2957 bool substituted =
false;
2958 WORD *ebegin = &*instr.begin();
2961 if (optim.type == 0) {
2963 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2964 int varnumx = ABS(optim.arg1) - 1;
2967 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
2968 WORD *e = ebegin + optim.eqnidxs[i];
2969 if (*(e+1) != OPER_MUL)
continue;
2972 for (WORD *t=e+3; *t!=0; t+=*t) {
2973 if (*t == ABS(*(t+*t-1))+1)
continue;
2974 if (*(t+1)==vartypex &&
2979 *(t+1) = EXTRASYMBOL;
2990 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2996 instr.push_back(newid);
2997 instr.push_back(OPER_MUL);
2998 instr.push_back(12);
3000 instr.push_back(vartypex);
3002 instr.push_back(varnumx);
3011 if (optim.type == 1) {
3013 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3014 int varnumx = ABS(optim.arg1) - 1;
3015 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3016 int varnumy = ABS(optim.arg2) - 1;
3018 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3019 WORD *e = ebegin + optim.eqnidxs[i];
3020 if (*(e+1) != OPER_MUL)
continue;
3024 for (WORD *t=e+3; *t!=0; t+=*t) {
3025 if (*t == ABS(*(t+*t-1))+1)
continue;
3026 if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
3027 if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
3031 if (powx>0 && powy>0 && powx==powy) {
3036 for (WORD *t=e+3; *t!=0;) {
3039 if (*t == ABS(*(t+*t-1))+1 ||
3040 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3041 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3042 memmove(newt, t, *t*
sizeof(WORD));
3046 sign *= SGN(*(t+*t-1));
3053 *newt++ = EXTRASYMBOL;
3068 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3074 instr.push_back(newid);
3075 instr.push_back(OPER_MUL);
3076 instr.push_back(20);
3078 instr.push_back(vartypex);
3080 instr.push_back(varnumx);
3086 instr.push_back(vartypey);
3088 instr.push_back(varnumy);
3098 if (optim.type == 2) {
3099 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3100 int varnum = ABS(optim.arg1) - 1;
3102 WORD ncoeff = optim.coeff.back();
3105 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3106 WORD *e = ebegin + optim.eqnidxs[i];
3108 if (*(e+1) == OPER_ADD) {
3110 for (WORD *t=e+3; *t!=0; t+=*t) {
3112 if (*t == ABS(*(t+*t-1))+1)
continue;
3114 if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
3115 BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3116 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3119 int sign = SGN(*(t+*t-1));
3122 while (*tend!=0) tend+=*tend;
3123 WORD nmove = tend - t - *t;
3124 memmove(t, t+*t, nmove*
sizeof(WORD));
3142 else if (*(e+1) == OPER_MUL) {
3144 bool coeff_match=
false, var_match=
false;
3148 for (WORD *t=e+3; *t!=0; t+=*t) {
3149 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3150 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3152 sign *= SGN(*(t+*t-1));
3154 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
3156 sign *= SGN(*(t+*t-1));
3161 if (coeff_match && var_match) {
3165 for (WORD *t=e+3; *t!=0;) {
3169 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3170 memmove(newt, t, dt*
sizeof(WORD));
3178 *newt++ = EXTRASYMBOL;
3194 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3200 instr.push_back(newid);
3201 instr.push_back(OPER_ADD);
3202 instr.push_back(9+ABS(ncoeff));
3203 instr.push_back(5+ABS(ncoeff));
3204 instr.push_back(vartype);
3206 instr.push_back(varnum);
3208 for (
int i=0; i<ABS(ncoeff); i++)
3209 instr.push_back(optim.coeff[i]);
3214 if (optim.type == 3) {
3215 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3216 int varnum = ABS(optim.arg1) - 1;
3218 WORD ncoeff = optim.coeff.back();
3221 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3222 WORD *e = ebegin + optim.eqnidxs[i];
3224 if (*(e+1) != OPER_ADD)
continue;
3226 int coeff_match=0, var_match=0;
3228 for (WORD *t=e+3; *t!=0; t+=*t) {
3229 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
3230 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
3231 coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
3232 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
3233 var_match = SGN(*(t+7));
3237 if (coeff_match * var_match == 1) {
3241 for (WORD *t=e+3; *t!=0;) {
3243 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3244 memmove(newt, t, dt*
sizeof(WORD));
3251 *newt++ = EXTRASYMBOL;
3257 *newt++ = 3*coeff_match;
3266 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3272 instr.push_back(newid);
3273 instr.push_back(OPER_ADD);
3274 instr.push_back(13+ABS(ncoeff));
3276 instr.push_back(vartype);
3278 instr.push_back(varnum);
3283 instr.push_back(ABS(ncoeff)+1);
3284 for (
int i=0; i<ABS(ncoeff); i++)
3285 instr.push_back(optim.coeff[i]);
3290 if (optim.type >= 4) {
3292 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3293 int varnumx = ABS(optim.arg1) - 1;
3294 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3295 int varnumy = ABS(optim.arg2) - 1;
3298 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3299 WORD *e = ebegin + optim.eqnidxs[i];
3301 if (*(e+1) != OPER_ADD)
continue;
3303 const WORD *coeffx=NULL, *coeffy=NULL;
3304 WORD ncoeffx=0,ncoeffy=0;
3307 for (WORD *t=e+3; *t!=0; t+=*t) {
3308 if (*t == ABS(*(t+*t-1))+1)
continue;
3309 if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
3311 ncoeffx = *(t+*t-1);
3313 if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
3315 ncoeffy = *(t+*t-1);
3321 if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
3323 if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
3325 vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
3336 for (WORD *t=e+3; *t!=0;) {
3338 if (*t == ABS(*(t+*t-1))+1 ||
3339 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3340 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3341 memmove(newt, t, dt*
sizeof(WORD));
3347 *newt++ = 5 + ABS(ncoeffx);
3348 *newt++ = EXTRASYMBOL;
3352 for (
int j=0; j<ABS(ncoeffx); j++)
3368 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3380 instr.push_back(newid);
3381 instr.push_back(OPER_ADD);
3382 instr.push_back(20);
3384 instr.push_back(vartypex);
3386 instr.push_back(varnumx);
3392 instr.push_back(vartypey);
3394 instr.push_back(varnumy);
3398 instr.push_back(3*(optim.type==4?1:-1));
3403 vector<int> renum(newid+1, 0);
3404 bool do_renum=
false;
3407 ebegin = &*instr.begin();
3409 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3410 WORD *e = ebegin + optim.eqnidxs[i];
3412 if (*t==0)
continue;
3413 if (*(t+*t)!=0)
continue;
3414 if (*t!=8)
continue;
3415 if (*(t+4)!=1)
continue;
3416 if (*(t+5)!=1 || *(t+6)!=1)
continue;
3419 renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
3430 WORD *eend = ebegin+instr.size();
3432 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3433 for (WORD *t=e+3; *t!=0; t+=*t) {
3434 if (*t == ABS(*(t+*t-1))+1)
continue;
3435 if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
3436 *(t+*t-1) *= SGN(renum[*(t+3)]);
3437 *(t+3) = ABS(renum[*(t+3)]) - 1;
3445 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3482 MesPrint (
"*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3488 vector<int> instr_idx(n);
3489 WORD *ebegin = &*instr.begin();
3490 WORD *eend = ebegin+instr.size();
3491 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3492 instr_idx[*e] = e - ebegin;
3501 WORD *numpar = (WORD *)Malloc1(nmax*
sizeof(WORD),
"numpar");
3502 for (
int i = 0; i < nmax; i++ ) numpar[i] = 0;
3504 for (WORD *e=ebegin; e!=eend; e+=*(e+2))
3505 for (WORD *t=e+3; *t!=0; t+=*t) {
3506 if (*t == ABS(*(t+*t-1))+1)
continue;
3507 if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
3511 for (
int i=0; i<n; i++) {
3512 WORD *e = &*instr.begin() + instr_idx[i];
3513 if (*(e+1) != OPER_ADD)
continue;
3518 for (WORD *t=e+3; *t!=0; t+=*t) {
3519 if (*t==ABS(*(t+*t-1))+1)
continue;
3523 cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
3526 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3527 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3528 if (*(t2+1) != OPER_MUL)
continue;
3529 for (t2+=3; *t2!=0; t2+=*t2) {
3530 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
3532 cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
3539 for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
3540 if (it->second > best) { x=it->first; best=it->second; }
3544 if (best>=2 && best>improve) {
3546 vector<WORD> new_eqn;
3547 new_eqn.push_back(n);
3548 new_eqn.push_back(OPER_ADD);
3549 new_eqn.push_back(0);
3553 for (WORD *t=e+3; *t!=0; t+=dt) {
3557 if (*t!=ABS(*(t+*t-1))+1) {
3561 WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
3563 new_eqn.push_back(*t-4);
3564 new_eqn.insert(new_eqn.end(), t+5, t+dt);
3570 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3571 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3572 if (*(t2+1) == OPER_MUL) {
3574 for (t2+=3; *t2!=0; t2+=*t2) {
3575 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
3576 WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
3578 if (y==x && *(t2+4)==1) {
3582 WORD sign = SGN(*(tend-1));
3583 while (*tend!=0) tend+=*tend;
3584 int dt2 = tend - (t2+*t2);
3585 memmove(t2, t2+*t2, (dt2+1)*
sizeof(WORD));
3594 int thisidx=new_eqn.size();
3595 new_eqn.insert(new_eqn.end(), t, t+dt);
3596 t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
3600 if (*t2 == ABS(*(t2+*t2-1))+1) {
3601 if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
3603 WORD sign = SGN(new_eqn.back());
3604 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3605 new_eqn.insert(new_eqn.end(), t2, t2+*t2);
3606 new_eqn.back() *= sign;
3612 UWORD *tmp = NumberMalloc(
"partial_factorize");
3614 MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
3615 (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
3617 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3618 new_eqn.push_back(ABS(ntmp)+1);
3619 new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
3620 NumberFree(tmp,
"partial_factorize");
3624 else if (*(t2+4)==1) {
3626 new_eqn.back() *= SGN(*(t2+*t2-1));
3627 new_eqn[thisidx+1] = *(t2+1);
3628 new_eqn[thisidx+2] = *(t2+2);
3629 new_eqn[thisidx+3] = *(t2+3);
3630 new_eqn[thisidx+4] = *(t2+4);
3641 memmove(newt, t, dt*
sizeof(WORD));
3647 new_eqn.push_back(0);
3648 new_eqn[2] = new_eqn.size();
3650 bool empty = newt == e+3;
3651 if ( n+1 >= nmax ) {
3652 int i, newnmax = nmax*2;
3653 WORD *newnumpar = (WORD *)Malloc1(newnmax*
sizeof(WORD),
"newnumpar");
3654 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3655 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3656 M_free(numpar,
"numpar");
3667 *newt++ = EXTRASYMBOL;
3678 instr_idx.push_back(instr.size());
3679 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3683 new_eqn.push_back(n);
3684 new_eqn.push_back(OPER_MUL);
3685 new_eqn.push_back(20);
3686 new_eqn.push_back(8);
3690 new_eqn.push_back(SYMBOL);
3691 new_eqn.push_back(4);
3692 new_eqn.push_back(x-1);
3693 new_eqn.push_back(1);
3696 new_eqn.push_back(EXTRASYMBOL);
3697 new_eqn.push_back(4);
3698 new_eqn.push_back(-x-1);
3699 new_eqn.push_back(1);
3701 new_eqn.push_back(1);
3702 new_eqn.push_back(1);
3703 new_eqn.push_back(3);
3704 new_eqn.push_back(8);
3706 new_eqn.push_back(EXTRASYMBOL);
3707 new_eqn.push_back(4);
3708 new_eqn.push_back(n-1);
3709 new_eqn.push_back(1);
3710 new_eqn.push_back(1);
3711 new_eqn.push_back(1);
3712 new_eqn.push_back(3);
3713 new_eqn.push_back(0);
3717 instr_idx.push_back(instr.size());
3718 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3719 if ( n+1 >= nmax ) {
3720 int i, newnmax = nmax*2;
3721 WORD *newnumpar = (WORD *)Malloc1(newnmax*
sizeof(WORD),
"newnumpar");
3722 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3723 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3724 M_free(numpar,
"numpar");
3733 e = &*instr.begin() + instr_idx[i];
3735 memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*
sizeof(WORD));
3744 MesPrint (
"*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3746 M_free(numpar,
"numpar");
3777 MesPrint (
"*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3778 thetime_str().c_str(), old_num_oper);
3783 WORD *ebegin = &*instr.begin();
3784 WORD *eend = ebegin+instr.size();
3787 int final_eqn_idx = 0;
3790 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3792 final_eqn_idx = e-ebegin;
3796 int old_next_eqn = next_eqn;
3802 vector<int> add_eqnidxs;
3805 int num_do_optims = max(AO.Optimize.greedyminnum, (
int)optim.size()*AO.Optimize.greedymaxperc/100);
3809 for (
int i=0; i<(int)optim.size(); i++)
3810 best_improve = max(best_improve, optim[i].improve);
3811 if (best_improve <= 1)
3812 num_do_optims = MAXPOSITIVE;
3814 while (optim.size() > 0 && num_do_optims-- > 0) {
3819 for (
int i=0; i<(int)optim.size(); i++)
3820 if (optim[i].improve > best_improve) {
3822 best_improve=optim[i].improve;
3826 for (
int i=0; i<(int)add_eqnidxs.size(); i++)
3827 optim[best].eqnidxs.push_back(add_eqnidxs[i]);
3830 int next_idx = instr.size();
3833 add_eqnidxs.push_back(next_idx);
3836 optim.erase(optim.begin()+best);
3843 if (next_eqn == old_next_eqn)
break;
3847 instr.push_back(next_eqn);
3848 instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
3851 instr[final_eqn_idx+3] = 0;
3854 WORD *t = &instr[0];
3856 ebegin = &*instr.begin();
3857 eend = ebegin+instr.size();
3860 for (WORD *e=ebegin; e!=eend; e+=de) {
3863 while (*(e+n) != 0) n+=*(e+n);
3865 memmove (t, e, n*
sizeof(WORD));
3870 instr.resize(t - &instr[0]);
3873 MesPrint (
"*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3916 MesPrint (
"*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3920 vector<const WORD *> instr;
3921 const WORD *tbegin = &*all_instr.begin();
3922 const WORD *tend = tbegin+all_instr.size();
3923 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2))
3925 int n = instr.size();
3930 vector<int> vars_needed(n);
3931 vector<bool> vis(n,
false);
3932 vector<vector<int> > conn(n);
3937 while (!s.empty()) {
3938 int i=s.top(); s.pop();
3941 if (vis[i])
continue;
3946 for (
const WORD *t=instr[i]+3; *t!=0; t+=*t)
3947 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3949 conn[i].push_back(k);
3957 vector<pair<int,int> > need;
3958 for (
int j=0; j<(int)conn[i].size(); j++)
3959 need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
3962 if (*(instr[i]+1) != OPER_COMMA)
3963 sort(need.rbegin(), need.rend());
3966 for (
int j=0; j<(int)need.size(); j++) {
3967 vars_needed[i] = max(vars_needed[i], need[j].first+j);
3968 conn[i][j] = need[j].second;
3975 vector<int> order, first(n,0), last(n,0);
3976 vis = vector<bool>(n,
false);
3979 while (!s.empty()) {
3981 int i=s.top(); s.pop();
3985 if (vis[i])
continue;
3988 for (
int j=(
int)conn[i].size()-1; j>=0; j--)
3989 s.push(conn[i][j]+1);
3993 first[i] = last[i] = order.size();
3995 for (
int j=0; j<(int)conn[i].size(); j++) {
3997 last[k] = max(last[k], first[i]);
4006 vector<int> renum(n);
4008 for (
int i=0; i<(int)order.size(); i++) {
4009 for (
int j=0; j<(int)conn[order[i]].size(); j++) {
4010 int k = conn[order[i]][j];
4011 if (last[k] == i) var.insert(renum[k]);
4014 if (var.empty()) var.insert(numvar++);
4015 renum[order[i]] = *var.begin(); var.erase(var.begin());
4021 vector<WORD> newinstr;
4023 for (
int i=0; i<(int)order.size(); i++) {
4025 int j = newinstr.size();
4026 newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
4028 newinstr[j] = renum[newinstr[j]];
4030 for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
4031 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
4032 *(t+3) = renum[*(t+3)];
4036 MesPrint (
"*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
4063 MesPrint (
"*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4070 LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
4071 if (time_limit == 0) time_limit=MAXPOSITIVE;
4074 LOCK(optimize_lock);
4075 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4076 optimize_best_Horner_schemes.pop_back();
4077 UNLOCK(optimize_lock);
4089 if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
4094 if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
4108 LOCK(optimize_lock);
4109 if (num_oper < optimize_best_num_oper) {
4110 optimize_num_vars = Horner_scheme.size();
4111 optimize_best_num_oper = num_oper;
4112 optimize_best_instr = instr;
4113 optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
4115 UNLOCK(optimize_lock);
4118 AN.poly_num_vars = 0;
4119 M_free(AN.poly_vars,
"poly_vars");
4122 MesPrint (
"*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4133void PF_optimize_expression_given_Horner_master_init () {
4138int PF_optimize_expression_given_Horner_master_next() {
4148void PF_optimize_expression_given_Horner_master () {
4151 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4155 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4156 optimize_best_Horner_schemes.pop_back();
4159 int next = PF_optimize_expression_given_Horner_master_next();
4163 int len = Horner_scheme.size();
4169 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4175void PF_optimize_expression_given_Horner_master_wait () {
4178 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4182 for (
int i = 1; i < PF.numtasks; i++) {
4183 int next = PF_optimize_expression_given_Horner_master_next();
4192 optimize_best_num_oper = INT_MAX;
4193 for (
int i = 1; i < PF.numtasks; i++) {
4201 if (len >= optimize_best_num_oper)
continue;
4204 optimize_best_num_oper = len;
4206 optimize_best_instr.resize(len);
4209 optimize_best_vars.resize(len);
4212 optimize_num_vars = optimize_best_vars.size();
4216 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4222void PF_optimize_expression_given_Horner_slave () {
4225 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4228 optimize_best_Horner_schemes.clear();
4229 optimize_best_num_oper = INT_MAX;
4247 if (len == 0)
break;
4250 optimize_best_Horner_schemes.push_back(vector<WORD>());
4251 vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4252 Horner_scheme.resize(len);
4260 if (optimize_best_num_oper != INT_MAX) {
4261 len = optimize_best_instr.size();
4264 len = optimize_best_vars.size();
4271 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4291void generate_output (
const vector<WORD> &instr,
int exprnr,
int extraoffset,
const vector<vector<WORD> > &brackets) {
4294 MesPrint (
"*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4298 vector<WORD> output;
4303 int maxsize = (int)instr.size();
4304 for (
int i=0; i<maxsize; i+=instr[i+2]) num++;
4305 int *tstart = (
int *)Malloc1(num*
sizeof(
int),
"nplaces");
4307 for (
int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
4308 for (
int j=0; j<num; j++) {
4312 WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
4315 AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
4318 for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4319 if (*t == ABS(*(t+*t-1))+1)
continue;
4320 if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
4321 if (*(t+1)==EXTRASYMBOL) {
4323 *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
4330 if (instr[i+1]==OPER_MUL) {
4332 WORD *now=AT.WorkPointer+1;
4337 for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
4339 if (*t == ABS(*(t+*t-1))+1) {
4341 memmove(AT.WorkPointer+instr[i+2], t, *t*
sizeof(WORD));
4347 memmove(now, t+1, n*
sizeof(WORD));
4349 coeff_sign *= SGN(*(t+dt-1));
4355 int n = *(AT.WorkPointer + instr[i+2]) - 1;
4356 memmove(now, AT.WorkPointer+instr[i+2]+1, n*
sizeof(WORD));
4366 *(now-1) *= coeff_sign;
4368 *AT.WorkPointer = now - AT.WorkPointer;
4374 if (instr[i+1]==OPER_COMMA) {
4375 WORD *start = AT.WorkPointer + instr[i+2];
4378 for (
const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4379 if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) )
break;
4380 *now++ = *t + brackets[b].size();
4381 if (brackets[b].size() != 0) {
4382 memcpy(now, &brackets[b][0], brackets[b].size()*
sizeof(WORD));
4383 now += brackets[b].size();
4385 memcpy(now, t+1, (*t-1)*
sizeof(WORD));
4390 memmove(AT.WorkPointer, start, (now-start)*
sizeof(WORD));
4395 if (i+instr[i+2]<(
int)instr.size())
4396 output.push_back(instr[i]+extraoffset);
4398 output.push_back(-(exprnr+1));
4403 while (*(AT.WorkPointer+n)!=0)
4404 n += *(AT.WorkPointer+n);
4406 output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
4410 output.push_back(0);
4413 if (AO.OptimizeResult.code != NULL)
4414 M_free(AO.OptimizeResult.code,
"optimize output");
4416 M_free(tstart,
"nplaces");
4419 AO.OptimizeResult.codesize = output.size();
4420 AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*
sizeof(WORD),
"optimize output");
4421 memcpy(AO.OptimizeResult.code, &output[0], output.size()*
sizeof(WORD));
4424 MesPrint (
"*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4443 MesPrint (
"*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4448 WORD *oldWorkPointer = AT.WorkPointer;
4450 CBUF *C = cbuf+AC.cbufnum;
4451 WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
4454 SetScratch(AR.infile,&(e->onfile));
4456 if ( GetTerm(BHEAD term) <= 0 ) {
4457 MesPrint(
"Expression %d has problems in scratchfile",exprnr);
4460 SeekScratch(AR.outfile,&position);
4461 e->onfile = position;
4462 if (
PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
4463 MesPrint(
"Expression %d has problems in output scratchfile",exprnr);
4467 AR.CurExpr = exprnr;
4472 WORD *t = AO.OptimizeResult.code;
4474 WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
4478 WORD* currentWorkPointer = AT.WorkPointer;
4480 bool is_expr = *t < 0;
4484 memcpy(currentWorkPointer, t, *t*
sizeof(WORD));
4485 Generator(BHEAD currentWorkPointer, C->numlhs);
4495 if (
EndSort(BHEAD NULL,0) < 0) {
4500 AT.WorkPointer = oldWorkPointer;
4501 AR.CurExpr = oldcurexpr;
4504 MesPrint (
"*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4527 MesPrint (
"*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4529 if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
4539 WORD *t = AO.OptimizeResult.code;
4543 t++;
while (*t!=0) t+=*t; t++;
4545 WORD **tstart = (WORD **)Malloc1(num*
sizeof(WORD *),
"print objects");
4546 t = AO.OptimizeResult.code; num = 0;
4549 t++;
while (*t!=0) t+=*t; t++;
4552 int halfnum = num/2;
4553 for (
int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
4554 for (
int i = 0; i < num; i++ ) {
4557 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4558 else if (print_expr)
4559 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4561 CBUF *C = cbuf + AM.sbufnum;
4562 if (C->numrhs >= AO.OptimizeResult.minvar)
4563 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4567 CBUF *C = cbuf + AM.sbufnum;
4568 if (C->numrhs >= AO.OptimizeResult.minvar)
4569 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4571 WORD *t = AO.OptimizeResult.code;
4574 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4576 else if (print_expr)
4577 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4579 while (*t!=0) t+=*t;
4585 MesPrint (
"*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4639#if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY))
4641 struct save_printflag {
4643 oldprintflag = AS.printflag;
4647 AS.printflag = oldprintflag;
4654 MesPrint (
"*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4655 MesPrint (
"*** %"); PrintRunningTime();
4659 optimize_lock = dummylock;
4662 AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
4669 if (PF.me == MASTER)
4671 MesPrint (
"*** runtime after preparing the expression: %"); PrintRunningTime();
4674 if (optimize_expr[0]==0 ||
4675 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
4676 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
4677 optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
4678 if (AO.OptimizeResult.code != NULL)
4679 M_free(AO.OptimizeResult.code,
"optimize output");
4684 AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*
sizeof(WORD),
"optimize output");
4685 AO.OptimizeResult.code[0] = -(exprnr+1);
4686 memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*
sizeof(WORD));
4687 AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
4691 optimize_best_Horner_schemes.clear();
4692 if (AO.Optimize.horner == O_OCCURRENCE) {
4693 if (AO.Optimize.hornerdirection != O_BACKWARD)
4694 optimize_best_Horner_schemes.push_back(
occurrence_order(optimize_expr,
false));
4695 if (AO.Optimize.hornerdirection != O_FORWARD)
4696 optimize_best_Horner_schemes.push_back(
occurrence_order(optimize_expr,
true));
4699 if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
4700 optimize_best_Horner_schemes.push_back(simulated_annealing());
4703 mcts_best_schemes.clear();
4704 if ( AO.inscheme ) {
4705 optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
4706 for (
int i=0; i < AO.schemenum; i++ )
4707 optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
4710 for (
int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
4713 for (set<pair<
int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
4714 optimize_best_Horner_schemes.push_back(i->second);
4716 MesPrint (
"{%a} -> %d",i->second.size(), &i->second[0], i->first);
4726 if (PF.me == MASTER)
4728 MesPrint (
"*** runtime after Horner: %"); PrintRunningTime();
4732 if (PF.me == MASTER ) {
4733 PF_optimize_expression_given_Horner_master_init();
4737 optimize_best_num_oper = INT_MAX;
4739 int imax = (int)optimize_best_Horner_schemes.size();
4741 for (
int i=0; i<imax; i++) {
4742#if defined(WITHPTHREADS)
4743 if (AM.totalnumberofthreads > 1)
4744 optimize_expression_given_Horner_threaded();
4746#elif defined(WITHMPI)
4747 if (PF.numtasks > 1)
4748 PF_optimize_expression_given_Horner_master();
4755 PF_optimize_expression_given_Horner_master_wait();
4758 if (PF.numtasks > 1)
4759 PF_optimize_expression_given_Horner_slave();
4769 if (PF.me == MASTER)
4771 generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
4775 if (AO.OptimizeResult.code == NULL) {
4776 AO.OptimizeResult.code = (WORD *)Malloc1(
sizeof(WORD),
"optimize output");
4783 if (PF.me == MASTER) {
4785 PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4786 PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4789 if (PF.me != MASTER) {
4790 PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4791 PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4797 snprintf (str,100,
"%d",AO.OptimizeResult.minvar);
4798 PutPreVar((UBYTE *)
"optimminvar_",(UBYTE *)str,0,1);
4799 snprintf (str,100,
"%d",AO.OptimizeResult.maxvar);
4800 PutPreVar((UBYTE *)
"optimmaxvar_",(UBYTE *)str,0,1);
4804 if (PF.me == MASTER)
4811 if (PF.me == MASTER)
4817 if (PF.me == MASTER) {
4820 if ( AO.Optimize.printstats > 0 ) {
4825 snprintf(str,20,
"%d",numop);
4826 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)str,0,1);
4831 snprintf(str,20,
"%d",numop);
4832 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)str,0,1);
4835 if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
4837 UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
4839 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4841 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4842 TokenToLine((UBYTE *)
" Scheme selected: ");
4843 for ( i = 0; i < optimize_num_vars; i++ ) {
4845 sym = optimize_best_vars[i];
4846 if ( i > 0 ) TokenToLine((UBYTE *)
",");
4847 if ( sym < NumSymbols ) {
4848 StrCopy(FindSymbol(sym),OutScr);
4852 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4853 if ( AC.extrasymbols == 0 ) {
4854 Out = NumCopy((MAXVARIABLES-sym),Out);
4855 Out = StrCopy((UBYTE *)
"_",Out);
4857 else if ( AC.extrasymbols == 1 ) {
4858 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4861 TokenToLine(OutScr);
4863 TokenToLine((UBYTE *)
";");
4866 AO.OutputLine = old1;
4871 UBYTE *OutScr, *Out, *outstring = 0;
4873 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4874 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4875 for ( i = 0; i < optimize_num_vars; i++ ) {
4877 sym = optimize_best_vars[i];
4878 if ( sym < NumSymbols ) {
4879 StrCopy(FindSymbol(sym),OutScr);
4883 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4884 if ( AC.extrasymbols == 0 ) {
4885 Out = NumCopy((MAXVARIABLES-sym),Out);
4886 Out = StrCopy((UBYTE *)
"_",Out);
4888 else if ( AC.extrasymbols == 1 ) {
4889 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4892 outstring = AddToString(outstring,OutScr,1);
4894 if ( outstring == 0 ) {
4895 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)
"",0,1);
4898 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)outstring,0,1);
4899 M_free(outstring,
"AddToString");
4907 if ( PF.me == MASTER ) {
4913 value = GetPreVar((UBYTE *)
"optimvalue_", WITHERROR);
4914 bytes = strlen((
char *)value);
4915 PF_LongMultiPack(&bytes, 1, PF_INT);
4916 PF_LongMultiPack(value, bytes, PF_BYTE);
4918 value = GetPreVar((UBYTE *)
"optimscheme_", WITHERROR);
4919 bytes = strlen((
char *)value);
4920 PF_LongMultiPack(&bytes, 1, PF_INT);
4921 PF_LongMultiPack(value, bytes, PF_BYTE);
4924 if ( PF.me != MASTER ) {
4925 static vector<UBYTE> prevarbuf;
4929 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4930 prevarbuf.reserve(bytes + 1);
4931 value = &prevarbuf[0];
4932 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4933 value[bytes] =
'\0';
4934 PutPreVar((UBYTE *)
"optimvalue_", value, NULL, 1);
4936 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4937 prevarbuf.reserve(bytes + 1);
4938 value = &prevarbuf[0];
4939 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4940 value[bytes] =
'\0';
4941 PutPreVar((UBYTE *)
"optimscheme_", value, NULL, 1);
4946 M_free(optimize_expr,
"LoadOptim");
4949 MesPrint (
"*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4979 if ( AO.OptimizeResult.code != NULL ) {
4980 M_free(AO.OptimizeResult.code,
"optimize output");
4981 AO.OptimizeResult.code = NULL;
4982 AO.OptimizeResult.codesize = 0;
4983 PutPreVar((UBYTE *)
"optimminvar_",(UBYTE *)(
"0"),0,1);
4984 PutPreVar((UBYTE *)
"optimmaxvar_",(UBYTE *)(
"0"),0,1);
4985 PruneExtraSymbols(AO.OptimizeResult.minvar-1);
4986 cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
4987 AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
4989 if ( AO.OptimizeResult.nameofexpr != NULL ) {
4996 if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
4997 MesPrint(
"@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
5000 M_free(AO.OptimizeResult.nameofexpr,
"optimize expression name");
5001 AO.OptimizeResult.nameofexpr = NULL;
5002 w = &(Expressions[numexpr].status);
5003 *w = SetExprCases(DROP,1,*w);
5004 if ( *w < 0 ) error = 1;
5006 snprintf (str,20,
"%d",cbuf[AM.sbufnum].numrhs);
5007 PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
5008 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)(
"0"),0,1);
5009 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)(
"0"),0,1);
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
LONG EndSort(PHEAD WORD *, int)
int Generator(PHEAD WORD *, WORD)
void LowerSortLevel(void)
int StoreTerm(PHEAD WORD *)
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
int PF_LongSingleSend(int to, int tag)
int PF_PrepareLongSinglePack(void)
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
int PF_Receive(int src, int tag, int *psrc, int *ptag)
int PF_Send(int to, int tag)
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
int PF_PrepareLongMultiPack(void)
int PF_LongMultiBroadcast(void)
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
vector< WORD > recycle_variables(const vector< WORD > &all_instr)
void optimize_print_code(int print_expr)
int count_operators(const WORD *expr, bool print=false)
void generate_output(const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
vector< vector< WORD > > get_brackets()
vector< WORD > Horner_tree(const WORD *expr, const vector< WORD > &order)
vector< WORD > occurrence_order(const WORD *expr, bool rev)
vector< optimization > find_optimizations(const vector< WORD > &instr)
vector< WORD > merge_operators(const vector< WORD > &all_instr, bool move_coeff)
void optimize_expression_given_Horner()
vector< WORD > optimize_greedy(vector< WORD > instr, LONG time_limit)
WORD generate_expression(WORD exprnr)
int count_operators_cse(const vector< WORD > &tree)
LONG get_expression(int exprnr)
bool do_optimization(const optimization optim, vector< WORD > &instr, int newid)
int partial_factorize(vector< WORD > &instr, int n, int improve)
void build_Horner_tree(const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
void fixarg(UWORD *t, WORD &n)
bool term_compare(const WORD *a, const WORD *b)
void my_random_shuffle(PHEAD RandomAccessIterator fr, RandomAccessIterator to)
int Optimize(WORD exprnr, int do_print)
WORD getpower(const WORD *term, int var, int pos)
vector< WORD > generate_instructions(const vector< WORD > &tree, bool do_CSE)
void PF_BroadcastBuffer(WORD **buffer, LONG *length)