FORM v5.0.0-35-g6318119
threads.c
Go to the documentation of this file.
1
22/* #[ License : */
23/*
24 * Copyright (C) 1984-2026 J.A.M. Vermaseren
25 * When using this file you are requested to refer to the publication
26 * J.A.M.Vermaseren "New features of FORM" math-ph/0010025
27 * This is considered a matter of courtesy as the development was paid
28 * for by FOM the Dutch physics granting agency and we would like to
29 * be able to track its scientific use to convince FOM of its value
30 * for the community.
31 *
32 * This file is part of FORM.
33 *
34 * FORM is free software: you can redistribute it and/or modify it under the
35 * terms of the GNU General Public License as published by the Free Software
36 * Foundation, either version 3 of the License, or (at your option) any later
37 * version.
38 *
39 * FORM is distributed in the hope that it will be useful, but WITHOUT ANY
40 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
41 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
42 * details.
43 *
44 * You should have received a copy of the GNU General Public License along
45 * with FORM. If not, see <http://www.gnu.org/licenses/>.
46 */
47/* #] License : */
48
49#ifdef WITHPTHREADS
50
51#define WHOLEBRACKETS
52/*
53 #[ Variables :
54
55 The sortbot additions are from 17-may-2007 and after. They constitute
56 an attempt to make the final merge sorting faster for the master.
57 This way the master has only one compare per term.
58 It does add some complexity, but the final merge routine (MasterMerge)
59 is much simpler for the sortbots. On the other hand the original merging is
60 for a large part a copy of the MergePatches routine in sort.c and hence
61 even though complex the bad part has been thoroughly debugged.
62*/
63
64#include "form3.h"
65
66#ifdef WITH_ALARM
67// This is only required if we are blocking SIG_ALRM in the worker threads.
68#include <signal.h>
69#endif
70
71#ifdef WITHFLOAT
72#include <gmp.h>
73
74int PackFloat(WORD *,mpf_t);
75int UnpackFloat(mpf_t, WORD *);
76void RatToFloat(mpf_t result, UWORD *formrat, int ratsize);
77#endif
78
79static int numberofthreads;
80static int numberofworkers;
81static int identityofthreads = 0;
82static int *listofavailables;
83static int topofavailables = 0;
84static pthread_key_t identitykey;
85static INILOCK(numberofthreadslock)
86static INILOCK(availabilitylock)
87static pthread_t *threadpointers = 0;
88static pthread_mutex_t *wakeuplocks;
89static pthread_mutex_t *wakeupmasterthreadlocks;
90static pthread_cond_t *wakeupconditions;
91static pthread_condattr_t *wakeupconditionattributes;
92static int *wakeup;
93static int *wakeupmasterthread;
94static INILOCK(wakeupmasterlock)
95static pthread_cond_t wakeupmasterconditions = PTHREAD_COND_INITIALIZER;
96static pthread_cond_t *wakeupmasterthreadconditions;
97static int wakeupmaster = 0;
98static int identityretval;
99/* static INILOCK(clearclocklock) */
100static LONG *timerinfo;
101static LONG *sumtimerinfo;
102static int numberclaimed;
103
104static THREADBUCKET **threadbuckets, **freebuckets;
105static int numthreadbuckets;
106static int numberoffullbuckets;
107
108/* static int numberbusy = 0; */
109
110INILOCK(dummylock)
111INIRWLOCK(dummyrwlock)
112static pthread_cond_t dummywakeupcondition = PTHREAD_COND_INITIALIZER;
113
114#ifdef WITHSORTBOTS
115static POSITION SortBotPosition;
116static int numberofsortbots;
117static INILOCK(wakeupsortbotlock)
118static pthread_cond_t wakeupsortbotconditions = PTHREAD_COND_INITIALIZER;
119static int topsortbotavailables = 0;
120static LONG numberofterms;
121#endif
122
123/*
124 #] Variables :
125 #[ Identity :
126 #[ StartIdentity :
127*/
133void StartIdentity(void)
134{
135 pthread_key_create(&identitykey,FinishIdentity);
136}
137
138/*
139 #] StartIdentity :
140 #[ FinishIdentity :
141*/
146void FinishIdentity(void *keyp)
147{
148 DUMMYUSE(keyp);
149/* free(keyp); */
150}
151
152/*
153 #] FinishIdentity :
154 #[ SetIdentity :
155*/
160int SetIdentity(int *identityretval)
161{
162/*
163#ifdef _MSC_VER
164 printf("addr %d\n",&numberofthreadslock);
165 printf("size %d\n",sizeof(numberofthreadslock));
166#endif
167*/
168 LOCK(numberofthreadslock);
169 *identityretval = identityofthreads++;
170 UNLOCK(numberofthreadslock);
171 pthread_setspecific(identitykey,(void *)identityretval);
172 return(*identityretval);
173}
174
175/*
176 #] SetIdentity :
177 #[ WhoAmI :
178*/
179
190int WhoAmI(void)
191{
192 int *identity;
193/*
194 First a fast exit for when there is at most one thread
195*/
196 if ( identityofthreads <= 1 ) return(0);
197/*
198 Now the reading of the key.
199 According to the book the statement should read:
200
201 pthread_getspecific(identitykey,(void **)(&identity));
202
203 but according to the information in pthread.h it is:
204*/
205 identity = (int *)pthread_getspecific(identitykey);
206 return(*identity);
207}
208
209/*
210 #] WhoAmI :
211 #[ BeginIdentities :
212*/
218void BeginIdentities(void)
219{
220 StartIdentity();
221 SetIdentity(&identityretval);
222}
223
224/*
225 #] BeginIdentities :
226 #] Identity :
227 #[ StartHandleLock :
228*/
235void StartHandleLock(void)
236{
237 AM.handlelock = dummyrwlock;
238}
239
240/*
241 #] StartHandleLock :
242 #[ StartAllThreads :
243*/
261int StartAllThreads(int number)
262{
263 int identity, j, dummy, mul;
264 ALLPRIVATES *B;
265 pthread_t thethread;
266 identity = WhoAmI();
267
268#ifdef WITHSORTBOTS
269 timerinfo = (LONG *)Malloc1(sizeof(LONG)*number*2,"timerinfo");
270 sumtimerinfo = (LONG *)Malloc1(sizeof(LONG)*number*2,"sumtimerinfo");
271 for ( j = 0; j < number*2; j++ ) { timerinfo[j] = 0; sumtimerinfo[j] = 0; }
272 mul = 2;
273#else
274 timerinfo = (LONG *)Malloc1(sizeof(LONG)*number,"timerinfo");
275 sumtimerinfo = (LONG *)Malloc1(sizeof(LONG)*number,"sumtimerinfo");
276 for ( j = 0; j < number; j++ ) { timerinfo[j] = 0; sumtimerinfo[j] = 0; }
277 mul = 1;
278#endif
279
280 listofavailables = (int *)Malloc1(sizeof(int)*(number+1),"listofavailables");
281 threadpointers = (pthread_t *)Malloc1(sizeof(pthread_t)*number*mul,"threadpointers");
282 AB = (ALLPRIVATES **)Malloc1(sizeof(ALLPRIVATES *)*number*mul,"Private structs");
283
284 wakeup = (int *)Malloc1(sizeof(int)*number*mul,"wakeup");
285 wakeuplocks = (pthread_mutex_t *)Malloc1(sizeof(pthread_mutex_t)*number*mul,"wakeuplocks");
286 wakeupconditions = (pthread_cond_t *)Malloc1(sizeof(pthread_cond_t)*number*mul,"wakeupconditions");
287 wakeupconditionattributes = (pthread_condattr_t *)
288 Malloc1(sizeof(pthread_condattr_t)*number*mul,"wakeupconditionattributes");
289
290 wakeupmasterthread = (int *)Malloc1(sizeof(int)*number*mul,"wakeupmasterthread");
291 wakeupmasterthreadlocks = (pthread_mutex_t *)Malloc1(sizeof(pthread_mutex_t)*number*mul,"wakeupmasterthreadlocks");
292 wakeupmasterthreadconditions = (pthread_cond_t *)Malloc1(sizeof(pthread_cond_t)*number*mul,"wakeupmasterthread");
293
294 numberofthreads = number;
295 numberofworkers = number - 1;
296 threadpointers[identity] = pthread_self();
297 topofavailables = 0;
298
299#ifdef WITH_ALARM
300 /* During thread creation, we block SIGALRM on the main thread. The created
301 threads will inherit this. This is required for #timeout to work properly
302 in TFORM: only the main thread should recieve SIGALRM. */
303 sigset_t sig_set;
304 sigemptyset(&sig_set);
305 sigaddset(&sig_set, SIGALRM);
306 pthread_sigmask(SIG_BLOCK, &sig_set, NULL);
307#endif
308
309 for ( j = 1; j < number; j++ ) {
310 if ( pthread_create(&thethread,NULL,RunThread,(void *)(&dummy)) )
311 goto failure;
312 }
313/*
314 Now we initialize the master at the same time that the workers are doing so.
315*/
316 B = InitializeOneThread(identity);
317 AR.infile = &(AR.Fscr[0]);
318 AR.outfile = &(AR.Fscr[1]);
319 AR.hidefile = &(AR.Fscr[2]);
320 AM.sbuflock = dummylock;
321 AS.inputslock = dummylock;
322 AS.outputslock = dummylock;
323 AS.MaxExprSizeLock = dummylock;
324 AP.PreVarLock = dummylock;
325 AC.halfmodlock = dummylock;
326 MakeThreadBuckets(number,0);
327/*
328 Now we wait for the workers to finish their startup.
329 We don't want to initialize the sortbots yet and run the risk that
330 some of them may end up with a lower number than one of the workers.
331*/
332 MasterWaitAll();
333#ifdef WITHSORTBOTS
334 if ( numberofworkers > 2 ) {
335 numberofsortbots = numberofworkers-2;
336 for ( j = numberofworkers+1; j < 2*numberofworkers-1; j++ ) {
337 if ( pthread_create(&thethread,NULL,RunSortBot,(void *)(&dummy)) )
338 goto failure;
339 }
340 }
341 else {
342 numberofsortbots = 0;
343 }
344 MasterWaitAllSortBots();
345 DefineSortBotTree();
346#endif
347 IniSortBlocks(number-1);
348 AS.MasterSort = 0;
349 AM.storefilelock = dummylock;
350
351#ifdef WITH_ALARM
352 /* Now we allow the main thread to recieve SIGALRM again. */
353 pthread_sigmask(SIG_UNBLOCK, &sig_set, NULL);
354#endif
355
356/*
357MesPrint("AB = %x %x %x %d",AB[0],AB[1],AB[2], identityofthreads);
358*/
359 return(0);
360failure:
361 MesPrint("Cannot start %d threads",number);
362 Terminate(-1);
363 return(-1);
364}
365
366/*
367 #] StartAllThreads :
368 #[ InitializeOneThread :
369*/
373UBYTE *scratchname[] = { (UBYTE *)"scratchsize",
374 (UBYTE *)"scratchsize",
375 (UBYTE *)"hidesize" };
402ALLPRIVATES *InitializeOneThread(int identity)
403{
404 WORD *t, *ScratchBuf;
405 int i, j, bsize, *bp;
406 LONG ScratchSize[3], IOsize;
407 ALLPRIVATES *B;
408 UBYTE *s;
409
410 wakeup[identity] = 0;
411 wakeuplocks[identity] = dummylock;
412 pthread_condattr_init(&(wakeupconditionattributes[identity]));
413 pthread_condattr_setpshared(&(wakeupconditionattributes[identity]),PTHREAD_PROCESS_PRIVATE);
414 wakeupconditions[identity] = dummywakeupcondition;
415 pthread_cond_init(&(wakeupconditions[identity]),&(wakeupconditionattributes[identity]));
416 wakeupmasterthread[identity] = 0;
417 wakeupmasterthreadlocks[identity] = dummylock;
418 wakeupmasterthreadconditions[identity] = dummywakeupcondition;
419
420 bsize = sizeof(ALLPRIVATES);
421 bsize = (bsize+sizeof(int)-1)/sizeof(int);
422 B = (ALLPRIVATES *)Malloc1(sizeof(int)*bsize,"B struct");
423 for ( bp = (int *)B, j = 0; j < bsize; j++ ) *bp++ = 0;
424
425 AB[identity] = B;
426/*
427 12-jun-2007 JV:
428 For the timing one has to know a few things:
429 The POSIX standard is that there is only a single process ID and that
430 getrusage returns the time of all the threads together.
431 Under Linux there are two methods though: The older LinuxThreads and NPTL.
432 LinuxThreads gives each thread its own process id. This makes that we
433 can time the threads with getrusage, and hence this was done. Under NPTL
434 this has been 'corrected' and suddenly getruage doesn't work anymore the
435 way it used to. Now we need
436 clock_gettime(CLOCK_THREAD_CPUTIME_ID,&timing)
437 which is declared in <time.h> and we need -lrt extra in the link statement.
438 (this is at least the case on blade02 at DESY-Zeuthen).
439 See also the code in tools.c at the routine Timer.
440 We may still have to include more stuff there.
441*/
442 if ( identity > 0 ) TimeCPU(0);
443
444#ifdef WITHSORTBOTS
445
446 if ( identity > numberofworkers ) {
447/*
448 Some workspace is needed when we have a PolyFun and we have to add
449 two terms and the new result is going to be longer than the old result.
450*/
451 LONG length = AM.WorkSize*sizeof(WORD)/8+AM.MaxTer*2;
452 AT.WorkSpace = (WORD *)Malloc1(length,"WorkSpace");
453 AT.WorkTop = AT.WorkSpace + length/sizeof(WORD);
454 AT.WorkPointer = AT.WorkSpace;
455 AT.identity = identity;
456/*
457 The SB struct gets treated in IniSortBlocks.
458 The SortBotIn variables will be defined DefineSortBotTree.
459*/
460 if ( AN.SoScratC == 0 ) {
461 AN.SoScratC = (UWORD *)Malloc1(2*(AM.MaxTal+2)*sizeof(UWORD),"Scratch in SortBot");
462 }
463 AT.SS = (SORTING *)Malloc1(sizeof(SORTING),"dummy sort buffer");
464 AT.SS->PolyFlag = 0;
465
466 AT.comsym[0] = 8;
467 AT.comsym[1] = SYMBOL;
468 AT.comsym[2] = 4;
469 AT.comsym[3] = 0;
470 AT.comsym[4] = 1;
471 AT.comsym[5] = 1;
472 AT.comsym[6] = 1;
473 AT.comsym[7] = 3;
474 AT.comnum[0] = 4;
475 AT.comnum[1] = 1;
476 AT.comnum[2] = 1;
477 AT.comnum[3] = 3;
478 AT.comfun[0] = FUNHEAD+4;
479 AT.comfun[1] = FUNCTION;
480 AT.comfun[2] = FUNHEAD;
481 AT.comfun[3] = 0;
482#if FUNHEAD > 3
483 for ( i = 4; i <= FUNHEAD; i++ )
484 AT.comfun[i] = 0;
485#endif
486 AT.comfun[FUNHEAD+1] = 1;
487 AT.comfun[FUNHEAD+2] = 1;
488 AT.comfun[FUNHEAD+3] = 3;
489 AT.comind[0] = 7;
490 AT.comind[1] = INDEX;
491 AT.comind[2] = 3;
492 AT.comind[3] = 0;
493 AT.comind[4] = 1;
494 AT.comind[5] = 1;
495 AT.comind[6] = 3;
496
497 AT.inprimelist = -1;
498 AT.sizeprimelist = 0;
499 AT.primelist = 0;
500 AT.bracketinfo = 0;
501
502 AR.CompareRoutine = (COMPAREDUMMY)(&Compare1);
503
504 AR.sLevel = 0;
505 AR.wranfia = 0;
506 AR.wranfcall = 0;
507 AR.wranfnpair1 = NPAIR1;
508 AR.wranfnpair2 = NPAIR2;
509 AN.NumFunSorts = 5;
510 AN.MaxFunSorts = 5;
511 AN.SplitScratch = 0;
512 AN.SplitScratchSize = AN.InScratch = 0;
513 AN.SplitScratch1 = 0;
514 AN.SplitScratchSize1 = AN.InScratch1 = 0;
515
516 AN.FunSorts = (SORTING **)Malloc1((AN.NumFunSorts+1)*sizeof(SORTING *),"FunSort pointers");
517 for ( i = 0; i <= AN.NumFunSorts; i++ ) AN.FunSorts[i] = 0;
518 AN.FunSorts[0] = AT.S0 = AT.SS;
519 AN.idfunctionflag = 0;
520 AN.tryterm = 0;
521
522 return(B);
523 }
524 if ( identity == 0 && AN.SoScratC == 0 ) {
525 AN.SoScratC = (UWORD *)Malloc1(2*(AM.MaxTal+2)*sizeof(UWORD),"Scratch in SortBot");
526 }
527#endif
528 AR.CurDum = AM.IndDum;
529 for ( j = 0; j < 3; j++ ) {
530 if ( identity == 0 ) {
531 if ( j == 2 ) {
532 ScratchSize[j] = AM.HideSize;
533 }
534 else {
535 ScratchSize[j] = AM.ScratSize;
536 }
537 if ( ScratchSize[j] < 10*AM.MaxTer ) ScratchSize[j] = 10 * AM.MaxTer;
538 }
539 else {
540/*
541 ScratchSize[j] = AM.ScratSize / (numberofthreads-1);
542 ScratchSize[j] = ScratchSize[j] / 20;
543 if ( ScratchSize[j] < 10*AM.MaxTer ) ScratchSize[j] = 10 * AM.MaxTer;
544*/
545 if ( j == 1 ) ScratchSize[j] = AM.ThreadScratOutSize;
546 else ScratchSize[j] = AM.ThreadScratSize;
547 if ( ScratchSize[j] < 4*AM.MaxTer ) ScratchSize[j] = 4 * AM.MaxTer;
548 AR.Fscr[j].name = 0;
549 }
550 ScratchSize[j] = ( ScratchSize[j] + 255 ) / 256;
551 ScratchSize[j] = ScratchSize[j] * 256;
552 ScratchBuf = (WORD *)Malloc1(ScratchSize[j]*sizeof(WORD),(char *)(scratchname[j]));
553 AR.Fscr[j].POsize = ScratchSize[j] * sizeof(WORD);
554 AR.Fscr[j].POfull = AR.Fscr[j].POfill = AR.Fscr[j].PObuffer = ScratchBuf;
555 AR.Fscr[j].POstop = AR.Fscr[j].PObuffer + ScratchSize[j];
556 PUTZERO(AR.Fscr[j].POposition);
557 AR.Fscr[j].pthreadslock = dummylock;
558 AR.Fscr[j].wPOsize = AR.Fscr[j].POsize;
559 AR.Fscr[j].wPObuffer = AR.Fscr[j].PObuffer;
560 AR.Fscr[j].wPOfill = AR.Fscr[j].POfill;
561 AR.Fscr[j].wPOfull = AR.Fscr[j].POfull;
562 AR.Fscr[j].wPOstop = AR.Fscr[j].POstop;
563 }
564 AR.InInBuf = 0;
565 AR.InHiBuf = 0;
566 AR.Fscr[0].handle = -1;
567 AR.Fscr[1].handle = -1;
568 AR.Fscr[2].handle = -1;
569 AR.FoStage4[0].handle = -1;
570 AR.FoStage4[1].handle = -1;
571 IOsize = AM.S0->file.POsize;
572#ifdef WITHZLIB
573 AR.FoStage4[0].ziosize = IOsize;
574 AR.FoStage4[1].ziosize = IOsize;
575 AR.FoStage4[0].ziobuffer = 0;
576 AR.FoStage4[1].ziobuffer = 0;
577#endif
578 AR.FoStage4[0].POsize = ((IOsize+sizeof(WORD)-1)/sizeof(WORD))*sizeof(WORD);
579 AR.FoStage4[1].POsize = ((IOsize+sizeof(WORD)-1)/sizeof(WORD))*sizeof(WORD);
580
581 AR.hidefile = &(AR.Fscr[2]);
582 AR.StoreData.Handle = -1;
583 AR.SortType = AC.SortType;
584
585 AN.IndDum = AM.IndDum;
586
587 if ( identity > 0 ) {
588 s = (UBYTE *)(FG.fname); i = 0;
589 while ( *s ) { s++; i++; }
590 s = (UBYTE *)Malloc1(sizeof(char)*(i+12),"name for Fscr[0] file");
591 snprintf((char *)s,i+12,"%s.%d",FG.fname,identity);
592 s[i-3] = 's'; s[i-2] = 'c'; s[i-1] = '0';
593 AR.Fscr[0].name = (char *)s;
594 s = (UBYTE *)(FG.fname); i = 0;
595 while ( *s ) { s++; i++; }
596 s = (UBYTE *)Malloc1(sizeof(char)*(i+12),"name for Fscr[1] file");
597 snprintf((char *)s,i+12,"%s.%d",FG.fname,identity);
598 s[i-3] = 's'; s[i-2] = 'c'; s[i-1] = '1';
599 AR.Fscr[1].name = (char *)s;
600 }
601
602 AR.CompressBuffer = (WORD *)Malloc1((AM.CompressSize+10)*sizeof(WORD),"compresssize");
603 AR.ComprTop = AR.CompressBuffer + AM.CompressSize;
604 AR.CompareRoutine = (COMPAREDUMMY)(&Compare1);
605/*
606 Here we make all allocations for the struct AT
607 (which is AB[identity].T or B->T with B = AB+identity).
608*/
609 AT.WorkSpace = (WORD *)Malloc1(AM.WorkSize*sizeof(WORD),"WorkSpace");
610 AT.WorkTop = AT.WorkSpace + AM.WorkSize;
611 AT.WorkPointer = AT.WorkSpace;
612
613 AT.Nest = (NESTING)Malloc1((LONG)sizeof(struct NeStInG)*AM.maxFlevels,"functionlevels");
614 AT.NestStop = AT.Nest + AM.maxFlevels;
615 AT.NestPoin = AT.Nest;
616
617 AT.WildMask = (WORD *)Malloc1((LONG)AM.MaxWildcards*sizeof(WORD),"maxwildcards");
618
619 LOCK(availabilitylock);
620 AT.ebufnum = inicbufs(); /* Buffer for extras during execution */
621 AT.fbufnum = inicbufs(); /* Buffer for caching in factorization */
622 AT.allbufnum = inicbufs(); /* Buffer for id,all */
623 AT.aebufnum = inicbufs(); /* Buffer for id,all */
624 UNLOCK(availabilitylock);
625
626 AT.RepCount = (int *)Malloc1((LONG)((AM.RepMax+3)*sizeof(int)),"repeat buffers");
627 AN.RepPoint = AT.RepCount;
628 AN.polysortflag = 0;
629 AN.subsubveto = 0;
630 AN.tryterm = 0;
631 AT.RepTop = AT.RepCount + AM.RepMax;
632
633 AT.WildArgTaken = (WORD *)Malloc1((LONG)AC.WildcardBufferSize*sizeof(WORD)/2
634 ,"argument list names");
635 AT.WildcardBufferSize = AC.WildcardBufferSize;
636 AT.previousEfactor = 0;
637
638 AT.identity = identity;
639 AT.LoadBalancing = 0;
640/*
641 Still to do: the SS stuff.
642 the Fscr[3]
643 the FoStage4[2]
644*/
645 if ( AT.WorkSpace == 0 ||
646 AT.Nest == 0 ||
647 AT.WildMask == 0 ||
648 AT.RepCount == 0 ||
649 AT.WildArgTaken == 0 ) goto OnError;
650/*
651 And initializations
652*/
653 AT.comsym[0] = 8;
654 AT.comsym[1] = SYMBOL;
655 AT.comsym[2] = 4;
656 AT.comsym[3] = 0;
657 AT.comsym[4] = 1;
658 AT.comsym[5] = 1;
659 AT.comsym[6] = 1;
660 AT.comsym[7] = 3;
661 AT.comnum[0] = 4;
662 AT.comnum[1] = 1;
663 AT.comnum[2] = 1;
664 AT.comnum[3] = 3;
665 AT.comfun[0] = FUNHEAD+4;
666 AT.comfun[1] = FUNCTION;
667 AT.comfun[2] = FUNHEAD;
668 AT.comfun[3] = 0;
669#if FUNHEAD > 3
670 for ( i = 4; i <= FUNHEAD; i++ )
671 AT.comfun[i] = 0;
672#endif
673 AT.comfun[FUNHEAD+1] = 1;
674 AT.comfun[FUNHEAD+2] = 1;
675 AT.comfun[FUNHEAD+3] = 3;
676 AT.comind[0] = 7;
677 AT.comind[1] = INDEX;
678 AT.comind[2] = 3;
679 AT.comind[3] = 0;
680 AT.comind[4] = 1;
681 AT.comind[5] = 1;
682 AT.comind[6] = 3;
683 AT.locwildvalue[0] = SUBEXPRESSION;
684 AT.locwildvalue[1] = SUBEXPSIZE;
685 for ( i = 2; i < SUBEXPSIZE; i++ ) AT.locwildvalue[i] = 0;
686 AT.mulpat[0] = TYPEMULT;
687 AT.mulpat[1] = SUBEXPSIZE+3;
688 AT.mulpat[2] = 0;
689 AT.mulpat[3] = SUBEXPRESSION;
690 AT.mulpat[4] = SUBEXPSIZE;
691 AT.mulpat[5] = 0;
692 AT.mulpat[6] = 1;
693 for ( i = 7; i < SUBEXPSIZE+5; i++ ) AT.mulpat[i] = 0;
694 AT.proexp[0] = SUBEXPSIZE+4;
695 AT.proexp[1] = EXPRESSION;
696 AT.proexp[2] = SUBEXPSIZE;
697 AT.proexp[3] = -1;
698 AT.proexp[4] = 1;
699 for ( i = 5; i < SUBEXPSIZE+1; i++ ) AT.proexp[i] = 0;
700 AT.proexp[SUBEXPSIZE+1] = 1;
701 AT.proexp[SUBEXPSIZE+2] = 1;
702 AT.proexp[SUBEXPSIZE+3] = 3;
703 AT.proexp[SUBEXPSIZE+4] = 0;
704 AT.dummysubexp[0] = SUBEXPRESSION;
705 AT.dummysubexp[1] = SUBEXPSIZE+4;
706 for ( i = 2; i < SUBEXPSIZE; i++ ) AT.dummysubexp[i] = 0;
707 AT.dummysubexp[SUBEXPSIZE] = WILDDUMMY;
708 AT.dummysubexp[SUBEXPSIZE+1] = 4;
709 AT.dummysubexp[SUBEXPSIZE+2] = 0;
710 AT.dummysubexp[SUBEXPSIZE+3] = 0;
711
712 AT.MinVecArg[0] = 7+ARGHEAD;
713 AT.MinVecArg[ARGHEAD] = 7;
714 AT.MinVecArg[1+ARGHEAD] = INDEX;
715 AT.MinVecArg[2+ARGHEAD] = 3;
716 AT.MinVecArg[3+ARGHEAD] = 0;
717 AT.MinVecArg[4+ARGHEAD] = 1;
718 AT.MinVecArg[5+ARGHEAD] = 1;
719 AT.MinVecArg[6+ARGHEAD] = -3;
720 t = AT.FunArg;
721 *t++ = 4+ARGHEAD+FUNHEAD;
722 for ( i = 1; i < ARGHEAD; i++ ) *t++ = 0;
723 *t++ = 4+FUNHEAD;
724 *t++ = 0;
725 *t++ = FUNHEAD;
726 for ( i = 2; i < FUNHEAD; i++ ) *t++ = 0;
727 *t++ = 1; *t++ = 1; *t++ = 3;
728
729 AT.inprimelist = -1;
730 AT.sizeprimelist = 0;
731 AT.primelist = 0;
732 AT.nfac = AT.nBer = 0;
733 AT.factorials = 0;
734 AT.bernoullis = 0;
735 AR.wranfia = 0;
736 AR.wranfcall = 0;
737 AR.wranfnpair1 = NPAIR1;
738 AR.wranfnpair2 = NPAIR2;
739 AR.wranfseed = 0;
740
741 AT.NormData = Malloc1(sizeof(*(AT.NormData)), "NormData thread pointers");
742 AT.NormDataSize = 1;
743 AT.NormData[0] = AllocNormData();
744 AT.NormDepth = 0;
745
746 AN.SplitScratch = 0;
747 AN.SplitScratchSize = AN.InScratch = 0;
748 AN.SplitScratch1 = 0;
749 AN.SplitScratchSize1 = AN.InScratch1 = 0;
750/*
751 Now the sort buffers. They depend on which thread. The master
752 inherits the sortbuffer from AM.S0
753*/
754 if ( identity == 0 ) {
755 AT.S0 = AM.S0;
756 }
757 else {
758/*
759 For the moment we don't have special settings.
760 They may become costly in virtual memory.
761*/
762 AT.S0 = AllocSort(AM.S0->LargeSize*sizeof(WORD)/numberofworkers
763 ,AM.S0->SmallSize*sizeof(WORD)/numberofworkers
764 ,AM.S0->SmallEsize*sizeof(WORD)/numberofworkers
765 ,AM.S0->TermsInSmall
766 ,AM.S0->MaxPatches
767/* ,AM.S0->MaxPatches/numberofworkers */
768 ,AM.S0->MaxFpatches/numberofworkers
769 ,AM.S0->file.POsize
770 ,0);
771 }
772 AR.CompressPointer = AR.CompressBuffer;
773/*
774 Install the store caches (15-aug-2006 JV)
775*/
776 AT.StoreCache = AT.StoreCacheAlloc = 0;
777 if ( AM.NumStoreCaches > 0 ) {
778 STORECACHE sa, sb;
779 LONG size;
780 size = sizeof(struct StOrEcAcHe)+AM.SizeStoreCache;
781 size = ((size-1)/sizeof(size_t)+1)*sizeof(size_t);
782 AT.StoreCacheAlloc = (STORECACHE)Malloc1(size*AM.NumStoreCaches,"StoreCaches");
783 sa = AT.StoreCache = AT.StoreCacheAlloc;
784 for ( i = 0; i < AM.NumStoreCaches; i++ ) {
785 sb = (STORECACHE)(void *)((UBYTE *)sa+size);
786 if ( i == AM.NumStoreCaches-1 ) {
787 sa->next = 0;
788 }
789 else {
790 sa->next = sb;
791 }
792 SETBASEPOSITION(sa->position,-1);
793 SETBASEPOSITION(sa->toppos,-1);
794 sa = sb;
795 }
796 }
797
798 ReserveTempFiles(2);
799 return(B);
800OnError:;
801 MLOCK(ErrorMessageLock);
802 MesPrint("Error initializing thread %d",identity);
803 MUNLOCK(ErrorMessageLock);
804 Terminate(-1);
805 return(B);
806}
807
808/*
809 #] InitializeOneThread :
810 #[ FinalizeOneThread :
811*/
822void FinalizeOneThread(int identity)
823{
824 timerinfo[identity] = TimeCPU(1);
825}
826
827/*
828 #] FinalizeOneThread :
829 #[ ClearAllThreads :
830*/
837void ClearAllThreads(void)
838{
839 int i;
840 MasterWaitAll();
841 for ( i = 1; i <= numberofworkers; i++ ) {
842 WakeupThread(i,CLEARCLOCK);
843 }
844#ifdef WITHSORTBOTS
845 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ ) {
846 WakeupThread(i,CLEARCLOCK);
847 }
848#endif
849}
850
851/*
852 #] ClearAllThreads :
853 #[ TerminateAllThreads :
854*/
861void TerminateAllThreads(void)
862{
863 int i;
864 for ( i = 1; i <= numberofworkers; i++ ) {
865 GetThread(i);
866 WakeupThread(i,TERMINATETHREAD);
867 }
868#ifdef WITHSORTBOTS
869 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ ) {
870 WakeupThread(i,TERMINATETHREAD);
871 }
872#endif
873 for ( i = 1; i <= numberofworkers; i++ ) {
874 pthread_join(threadpointers[i],NULL);
875 }
876#ifdef WITHSORTBOTS
877 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ ) {
878 pthread_join(threadpointers[i],NULL);
879 }
880#endif
881}
882
883/*
884 #] TerminateAllThreads :
885 #[ MakeThreadBuckets :
886*/
912int MakeThreadBuckets(int number, int par)
913{
914 int i;
915 LONG sizethreadbuckets;
916 THREADBUCKET *thr;
917/*
918 First we need a decent estimate. Not all terms should be maximal.
919 Note that AM.MaxTer is in bytes!!!
920 Maybe we should try to limit the size here a bit more effectively.
921 This is a great consumer of memory.
922*/
923 sizethreadbuckets = ( AC.ThreadBucketSize + 1 ) * AM.MaxTer + 2*sizeof(WORD);
924 if ( AC.ThreadBucketSize >= 250 ) sizethreadbuckets /= 4;
925 else if ( AC.ThreadBucketSize >= 90 ) sizethreadbuckets /= 3;
926 else if ( AC.ThreadBucketSize >= 40 ) sizethreadbuckets /= 2;
927 sizethreadbuckets /= sizeof(WORD);
928
929 if ( par == 0 ) {
930 numthreadbuckets = 2*(number-1);
931 threadbuckets = (THREADBUCKET **)Malloc1(numthreadbuckets*sizeof(THREADBUCKET *),"threadbuckets");
932 freebuckets = (THREADBUCKET **)Malloc1(numthreadbuckets*sizeof(THREADBUCKET *),"threadbuckets");
933 }
934 if ( par > 0 ) {
935 if ( sizethreadbuckets <= threadbuckets[0]->threadbuffersize ) return(0);
936 for ( i = 0; i < numthreadbuckets; i++ ) {
937 thr = threadbuckets[i];
938 M_free(thr->deferbuffer,"deferbuffer");
939 }
940 }
941 else {
942 for ( i = 0; i < numthreadbuckets; i++ ) {
943 threadbuckets[i] = (THREADBUCKET *)Malloc1(sizeof(THREADBUCKET),"threadbuckets");
944 threadbuckets[i]->lock = dummylock;
945 }
946 }
947 for ( i = 0; i < numthreadbuckets; i++ ) {
948 thr = threadbuckets[i];
949 thr->threadbuffersize = sizethreadbuckets;
950 thr->free = BUCKETFREE;
951 thr->deferbuffer = (POSITION *)Malloc1(2*sizethreadbuckets*sizeof(WORD)
952 +(AC.ThreadBucketSize+1)*sizeof(POSITION),"deferbuffer");
953 thr->threadbuffer = (WORD *)(thr->deferbuffer+AC.ThreadBucketSize+1);
954 thr->compressbuffer = (WORD *)(thr->threadbuffer+sizethreadbuckets);
955 thr->busy = BUCKETPREPARINGTERM;
956 thr->usenum = thr->totnum = 0;
957 thr->type = BUCKETDOINGTERMS;
958 }
959 return(0);
960}
961
962/*
963 #] MakeThreadBuckets :
964 #[ GetTimerInfo :
965*/
966
972int GetTimerInfo(LONG** ti,LONG** sti)
973{
974 *ti = timerinfo;
975 *sti = sumtimerinfo;
976#ifdef WITHSORTBOTS
977 return AM.totalnumberofthreads*2;
978#else
979 return AM.totalnumberofthreads;
980#endif
981}
982
983/*
984 #] GetTimerInfo :
985 #[ WriteTimerInfo :
986*/
987
992void WriteTimerInfo(LONG* ti,LONG* sti)
993{
994 int i;
995#ifdef WITHSORTBOTS
996 int max = AM.totalnumberofthreads*2;
997#else
998 int max = AM.totalnumberofthreads;
999#endif
1000 for ( i=0; i<max; ++i ) {
1001 timerinfo[i] = ti[i];
1002 sumtimerinfo[i] = sti[i];
1003 }
1004}
1005
1006/*
1007 #] WriteTimerInfo :
1008 #[ GetWorkerTimes :
1009*/
1015LONG GetWorkerTimes(void)
1016{
1017 LONG retval = 0;
1018 int i;
1019 for ( i = 1; i <= numberofworkers; i++ ) retval += timerinfo[i] + sumtimerinfo[i];
1020#ifdef WITHSORTBOTS
1021 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ )
1022 retval += timerinfo[i] + sumtimerinfo[i];
1023#endif
1024 return(retval);
1025}
1026
1027/*
1028 #] GetWorkerTimes :
1029 #[ UpdateOneThread :
1030*/
1037int UpdateOneThread(int identity)
1038{
1039 ALLPRIVATES *B = AB[identity], *B0 = AB[0];
1040 AR.GetFile = AR0.GetFile;
1041 AR.KeptInHold = AR0.KeptInHold;
1042 AR.CurExpr = AR0.CurExpr;
1043 AR.SortType = AC.SortType;
1044 if ( AT.WildcardBufferSize < AC.WildcardBufferSize ) {
1045 M_free(AT.WildArgTaken,"argument list names");
1046 AT.WildcardBufferSize = AC.WildcardBufferSize;
1047 AT.WildArgTaken = (WORD *)Malloc1((LONG)AC.WildcardBufferSize*sizeof(WORD)/2
1048 ,"argument list names");
1049 if ( AT.WildArgTaken == 0 ) return(-1);
1050 }
1051 return(0);
1052}
1053
1054/*
1055 #] UpdateOneThread :
1056 #[ LoadOneThread :
1057*/
1071int LoadOneThread(int from, int identity, THREADBUCKET *thr, int par)
1072{
1073 WORD *t1, *t2;
1074 ALLPRIVATES *B = AB[identity], *B0 = AB[from];
1075
1076 AR.DefPosition = AR0.DefPosition;
1077 AR.NoCompress = AR0.NoCompress;
1078 AR.gzipCompress = AR0.gzipCompress;
1079 AR.BracketOn = AR0.BracketOn;
1080 AR.CurDum = AR0.CurDum;
1081 AR.DeferFlag = AR0.DeferFlag;
1082 AR.TePos = 0;
1083 AR.sLevel = AR0.sLevel;
1084 AR.Stage4Name = AR0.Stage4Name;
1085 AR.GetOneFile = AR0.GetOneFile;
1086 AR.PolyFun = AR0.PolyFun;
1087 AR.PolyFunInv = AR0.PolyFunInv;
1088 AR.PolyFunType = AR0.PolyFunType;
1089 AR.PolyFunExp = AR0.PolyFunExp;
1090 AR.PolyFunVar = AR0.PolyFunVar;
1091 AR.PolyFunPow = AR0.PolyFunPow;
1092 AR.Eside = AR0.Eside;
1093 AR.Cnumlhs = AR0.Cnumlhs;
1094/*
1095 AR.MaxBracket = AR0.MaxBracket;
1096
1097 The compressbuffer contents are mainly relevant for keep brackets
1098 We should do this only if there is a keep brackets statement
1099 We may however still need the compressbuffer for expressions in the rhs.
1100*/
1101 if ( par >= 1 ) {
1102/*
1103 We may not need this %%%%% 7-apr-2006
1104*/
1105 t1 = AR.CompressBuffer; t2 = AR0.CompressBuffer;
1106 while ( t2 < AR0.CompressPointer ) *t1++ = *t2++;
1107 AR.CompressPointer = t1;
1108
1109 }
1110 else {
1111 AR.CompressPointer = AR.CompressBuffer;
1112 }
1113 if ( AR.DeferFlag ) {
1114 if ( AR.infile->handle < 0 ) {
1115 AR.infile->POfill = AR0.infile->POfill;
1116 }
1117 else {
1118/*
1119 We have to set the value of POposition to something that will
1120 force a read in the first try.
1121*/
1122 AR.infile->POfull = AR.infile->POfill = AR.infile->PObuffer;
1123 }
1124 }
1125 if ( par == 0 ) {
1126 AN.threadbuck = thr;
1127 AN.ninterms = thr->firstterm;
1128 }
1129 else if ( par == 1 ) {
1130 WORD *tstop;
1131 t1 = thr->threadbuffer; tstop = t1 + *t1;
1132 t2 = AT.WorkPointer;
1133 while ( t1 < tstop ) *t2++ = *t1++;
1134 AN.ninterms = thr->firstterm;
1135 }
1136 AN.TeInFun = 0;
1137 AN.ncmod = AC.ncmod;
1138 AT.BrackBuf = AT0.BrackBuf;
1139 AT.bracketindexflag = AT0.bracketindexflag;
1140 AN.PolyFunTodo = 0;
1141/*
1142 The relevant variables and the term are in their place.
1143 There is nothing more to do.
1144*/
1145 return(0);
1146}
1147
1148/*
1149 #] LoadOneThread :
1150 #[ BalanceRunThread :
1151*/
1167int BalanceRunThread(PHEAD int identity, WORD *term, WORD level)
1168{
1169 GETBIDENTITY
1170 ALLPRIVATES *BB;
1171 WORD *t, *tt;
1172 int i, *ti, *tti;
1173
1174 LoadOneThread(AT.identity,identity,0,2);
1175/*
1176 Extra loading if needed. Quantities changed in Generator.
1177 Like the level that has to be passed.
1178*/
1179 BB = AB[identity];
1180 BB->R.level = level;
1181 BB->T.TMbuff = AT.TMbuff;
1182 ti = AT.RepCount; tti = BB->T.RepCount;
1183 i = AN.RepPoint - AT.RepCount;
1184 BB->N.RepPoint = BB->T.RepCount + i;
1185 for ( ; i >= 0; i-- ) tti[i] = ti[i];
1186
1187 t = term; i = *term;
1188 tt = BB->T.WorkSpace;
1189 NCOPY(tt,t,i);
1190 BB->T.WorkPointer = tt;
1191
1192 WakeupThread(identity,HIGHERLEVELGENERATION);
1193
1194 return(0);
1195}
1196
1197/*
1198 #] BalanceRunThread :
1199 #[ SetWorkerFiles :
1200*/
1205void SetWorkerFiles(void)
1206{
1207 int id;
1208 ALLPRIVATES *B, *B0 = AB[0];
1209 for ( id = 1; id < AM.totalnumberofthreads; id++ ) {
1210 B = AB[id];
1211 AR.infile = &(AR.Fscr[0]);
1212 AR.outfile = &(AR.Fscr[1]);
1213 AR.hidefile = &(AR.Fscr[2]);
1214 AR.infile->handle = AR0.infile->handle;
1215 AR.hidefile->handle = AR0.hidefile->handle;
1216 if ( AR.infile->handle < 0 ) {
1217 AR.infile->PObuffer = AR0.infile->PObuffer;
1218 AR.infile->POstop = AR0.infile->POstop;
1219 AR.infile->POfill = AR0.infile->POfill;
1220 AR.infile->POfull = AR0.infile->POfull;
1221 AR.infile->POsize = AR0.infile->POsize;
1222 AR.InInBuf = AR0.InInBuf;
1223 AR.infile->POposition = AR0.infile->POposition;
1224 AR.infile->filesize = AR0.infile->filesize;
1225 }
1226 else {
1227 AR.infile->PObuffer = AR.infile->wPObuffer;
1228 AR.infile->POstop = AR.infile->wPOstop;
1229 AR.infile->POfill = AR.infile->wPOfill;
1230 AR.infile->POfull = AR.infile->wPOfull;
1231 AR.infile->POsize = AR.infile->wPOsize;
1232 AR.InInBuf = 0;
1233 PUTZERO(AR.infile->POposition);
1234 }
1235/*
1236 If there is some writing, it betters happens to ones own outfile.
1237 Currently this is to be done only for InParallel.
1238 Merging of the outputs is then done by the CopyExpression routine.
1239*/
1240 {
1241 AR.outfile->PObuffer = AR.outfile->wPObuffer;
1242 AR.outfile->POstop = AR.outfile->wPOstop;
1243 AR.outfile->POfill = AR.outfile->wPOfill;
1244 AR.outfile->POfull = AR.outfile->wPOfull;
1245 AR.outfile->POsize = AR.outfile->wPOsize;
1246 PUTZERO(AR.outfile->POposition);
1247 }
1248 if ( AR.hidefile->handle < 0 ) {
1249 AR.hidefile->PObuffer = AR0.hidefile->PObuffer;
1250 AR.hidefile->POstop = AR0.hidefile->POstop;
1251 AR.hidefile->POfill = AR0.hidefile->POfill;
1252 AR.hidefile->POfull = AR0.hidefile->POfull;
1253 AR.hidefile->POsize = AR0.hidefile->POsize;
1254 AR.InHiBuf = AR0.InHiBuf;
1255 AR.hidefile->POposition = AR0.hidefile->POposition;
1256 AR.hidefile->filesize = AR0.hidefile->filesize;
1257 }
1258 else {
1259 AR.hidefile->PObuffer = AR.hidefile->wPObuffer;
1260 AR.hidefile->POstop = AR.hidefile->wPOstop;
1261 AR.hidefile->POfill = AR.hidefile->wPOfill;
1262 AR.hidefile->POfull = AR.hidefile->wPOfull;
1263 AR.hidefile->POsize = AR.hidefile->wPOsize;
1264 AR.InHiBuf = 0;
1265 PUTZERO(AR.hidefile->POposition);
1266 }
1267 }
1268 if ( AR0.StoreData.dirtyflag ) {
1269 for ( id = 1; id < AM.totalnumberofthreads; id++ ) {
1270 B = AB[id];
1271 AR.StoreData = AR0.StoreData;
1272 }
1273 }
1274}
1275
1276/*
1277 #] SetWorkerFiles :
1278 #[ RunThread :
1279*/
1287void *RunThread(void *dummy)
1288{
1289 WORD *term, *ttin, *tt, *ttco, *oldwork;
1290 int identity, wakeupsignal, identityretv, i, tobereleased, errorcode;
1291 ALLPRIVATES *B;
1292 THREADBUCKET *thr;
1293 POSITION *ppdef;
1294 EXPRESSIONS e;
1295 DUMMYUSE(dummy);
1296 identity = SetIdentity(&identityretv);
1297 threadpointers[identity] = pthread_self();
1298 B = InitializeOneThread(identity);
1299 while ( ( wakeupsignal = ThreadWait(identity) ) > 0 ) {
1300 switch ( wakeupsignal ) {
1301/*
1302 #[ STARTNEWEXPRESSION :
1303*/
1304 case STARTNEWEXPRESSION:
1305/*
1306 Set up the sort routines etc.
1307 Start with getting some buffers synchronized with the compiler
1308*/
1309 if ( UpdateOneThread(identity) ) {
1310 MLOCK(ErrorMessageLock);
1311 MesPrint("Update error in starting expression in thread %d in module %d",identity,AC.CModule);
1312 MUNLOCK(ErrorMessageLock);
1313 Terminate(-1);
1314 }
1315 AR.DeferFlag = AC.ComDefer;
1316 AR.sLevel = AS.sLevel;
1317 AR.MaxDum = AM.IndDum;
1318 AR.expchanged = AB[0]->R.expchanged;
1319 AR.expflags = AB[0]->R.expflags;
1320 AR.PolyFun = AB[0]->R.PolyFun;
1321 AR.PolyFunInv = AB[0]->R.PolyFunInv;
1322 AR.PolyFunType = AB[0]->R.PolyFunType;
1323 AR.PolyFunExp = AB[0]->R.PolyFunExp;
1324 AR.PolyFunVar = AB[0]->R.PolyFunVar;
1325 AR.PolyFunPow = AB[0]->R.PolyFunPow;
1326/*
1327 Now fire up the sort buffer.
1328*/
1329 NewSort(BHEAD0);
1330 break;
1331/*
1332 #] STARTNEWEXPRESSION :
1333 #[ LOWESTLEVELGENERATION :
1334*/
1335 case LOWESTLEVELGENERATION:
1336#ifdef INNERTEST
1337 if ( AC.InnerTest ) {
1338 if ( StrCmp(AC.TestValue,(UBYTE *)INNERTEST) == 0 ) {
1339 MesPrint("Testing(Worker%d): value = %s",AT.identity,AC.TestValue);
1340 }
1341 }
1342#endif
1343 e = Expressions + AR.CurExpr;
1344 thr = AN.threadbuck;
1345 ppdef = thr->deferbuffer;
1346 ttin = thr->threadbuffer;
1347 ttco = thr->compressbuffer;
1348 term = AT.WorkPointer;
1349 thr->usenum = 0;
1350 tobereleased = 0;
1351 AN.inputnumber = thr->firstterm;
1352 AN.ninterms = thr->firstterm;
1353 do {
1354 thr->usenum++; /* For if the master wants to steal the bucket */
1355 tt = term; i = *ttin;
1356 NCOPY(tt,ttin,i);
1357 AT.WorkPointer = tt;
1358 if ( AR.DeferFlag ) {
1359 tt = AR.CompressBuffer; i = *ttco;
1360 NCOPY(tt,ttco,i);
1361 AR.CompressPointer = tt;
1362 AR.DefPosition = ppdef[0]; ppdef++;
1363 }
1364 if ( thr->free == BUCKETTERMINATED ) {
1365/*
1366 The next statement allows the master to steal the bucket
1367 for load balancing purposes. We do still execute the current
1368 term, but afterwards we drop out.
1369 Once we have written the release code, we cannot use this
1370 bucket anymore. Hence the exit to the label bucketstolen.
1371*/
1372 if ( thr->usenum == thr->totnum ) {
1373 thr->free = BUCKETCOMINGFREE;
1374 }
1375 else {
1376 thr->free = BUCKETRELEASED;
1377 tobereleased = 1;
1378 }
1379 }
1380/*
1381 What if we want to steal and we set thr->free while
1382 the thread is inside the next code for a long time?
1383 if ( AT.LoadBalancing ) {
1384*/
1385 LOCK(thr->lock);
1386 thr->busy = BUCKETDOINGTERM;
1387 UNLOCK(thr->lock);
1388/*
1389 }
1390 else {
1391 thr->busy = BUCKETDOINGTERM;
1392 }
1393*/
1394 AN.RepPoint = AT.RepCount + 1;
1395
1396 if ( ( e->vflags & ISFACTORIZED ) != 0 && term[1] == HAAKJE ) {
1397 StoreTerm(BHEAD term);
1398 }
1399 else {
1400 if ( AR.DeferFlag ) {
1401 AR.CurDum = AN.IndDum = Expressions[AR.CurExpr].numdummies + AM.IndDum;
1402 }
1403 else {
1404 AN.IndDum = AM.IndDum;
1405 AR.CurDum = ReNumber(BHEAD term);
1406 }
1407 if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
1408 if ( AN.ncmod ) {
1409 if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
1410 else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
1411 }
1412 else if ( AC.PolyRatFunChanged ) PolyFunDirty(BHEAD term);
1413 if ( ( AP.PreDebug & THREADSDEBUG ) != 0 ) {
1414 MLOCK(ErrorMessageLock);
1415 MesPrint("Thread %w executing term:");
1416 PrintTerm(term,"LLG");
1417 MUNLOCK(ErrorMessageLock);
1418 }
1419 if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
1420 && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
1421 PolyFunClean(BHEAD term);
1422 }
1423 if ( Generator(BHEAD term,0) ) {
1425 MLOCK(ErrorMessageLock);
1426 MesPrint("Error in processing one term in thread %d in module %d",identity,AC.CModule);
1427 MUNLOCK(ErrorMessageLock);
1428 Terminate(-1);
1429 }
1430 AN.ninterms++;
1431 }
1432/* if ( AT.LoadBalancing ) { */
1433 LOCK(thr->lock);
1434 thr->busy = BUCKETPREPARINGTERM;
1435 UNLOCK(thr->lock);
1436/*
1437 }
1438 else {
1439 thr->busy = BUCKETPREPARINGTERM;
1440 }
1441*/
1442 if ( thr->free == BUCKETTERMINATED ) {
1443 if ( thr->usenum == thr->totnum ) {
1444 thr->free = BUCKETCOMINGFREE;
1445 }
1446 else {
1447 thr->free = BUCKETRELEASED;
1448 tobereleased = 1;
1449 }
1450 }
1451 if ( tobereleased ) goto bucketstolen;
1452 } while ( *ttin );
1453 thr->free = BUCKETCOMINGFREE;
1454bucketstolen:;
1455/* if ( AT.LoadBalancing ) { */
1456 LOCK(thr->lock);
1457 thr->busy = BUCKETTOBERELEASED;
1458 UNLOCK(thr->lock);
1459/* }
1460 else {
1461 thr->busy = BUCKETTOBERELEASED;
1462 }
1463*/
1464 AT.WorkPointer = term;
1465 break;
1466/*
1467 #] LOWESTLEVELGENERATION :
1468 #[ FINISHEXPRESSION :
1469*/
1470#ifdef WITHSORTBOTS
1471 case CLAIMOUTPUT:
1472 LOCK(AT.SB.MasterBlockLock[1]);
1473 break;
1474#endif
1475 case FINISHEXPRESSION:
1476/*
1477 Finish the sort
1478
1479 Start with claiming the first block
1480 Once we have claimed it we can let the master know that
1481 everything is all right.
1482*/
1483 LOCK(AT.SB.MasterBlockLock[1]);
1484 ThreadClaimedBlock(identity);
1485/*
1486 Entry for when we work with sortbots
1487*/
1488#ifdef WITHSORTBOTS
1489 /* fall through */
1490 case FINISHEXPRESSION2:
1491#endif
1492/*
1493 Now we may need here an fsync on the sort file
1494*/
1495 if ( AC.ThreadSortFileSynch ) {
1496 if ( AT.S0->file.handle >= 0 ) {
1497 SynchFile(AT.S0->file.handle);
1498 }
1499 }
1500 AT.SB.FillBlock = 1;
1501 AT.SB.MasterFill[1] = AT.SB.MasterStart[1];
1502 errorcode = EndSort(BHEAD AT.S0->sBuffer,0);
1503 UNLOCK(AT.SB.MasterBlockLock[AT.SB.FillBlock]);
1504 UpdateMaxSize();
1505 if ( errorcode ) {
1506 MLOCK(ErrorMessageLock);
1507 MesPrint("Error terminating sort in thread %d in module %d",identity,AC.CModule);
1508 MUNLOCK(ErrorMessageLock);
1509 Terminate(-1);
1510 }
1511 break;
1512/*
1513 #] FINISHEXPRESSION :
1514 #[ CLEANUPEXPRESSION :
1515*/
1516 case CLEANUPEXPRESSION:
1517/*
1518 Cleanup everything and wait for the next expression
1519*/
1520 if ( AR.outfile->handle >= 0 ) {
1521 CloseFile(AR.outfile->handle);
1522 AR.outfile->handle = -1;
1523 remove(AR.outfile->name);
1524 AR.outfile->POfill = AR.outfile->POfull = AR.outfile->PObuffer;
1525 PUTZERO(AR.outfile->POposition);
1526 PUTZERO(AR.outfile->filesize);
1527 }
1528 else {
1529 AR.outfile->POfill = AR.outfile->POfull = AR.outfile->PObuffer;
1530 PUTZERO(AR.outfile->POposition);
1531 PUTZERO(AR.outfile->filesize);
1532 }
1533 {
1534 CBUF *C = cbuf+AT.ebufnum;
1535 WORD **w, ii;
1536 if ( C->numrhs > 0 || C->numlhs > 0 ) {
1537 if ( C->rhs ) {
1538 w = C->rhs; ii = C->numrhs;
1539 do { *w++ = 0; } while ( --ii > 0 );
1540 }
1541 if ( C->lhs ) {
1542 w = C->lhs; ii = C->numlhs;
1543 do { *w++ = 0; } while ( --ii > 0 );
1544 }
1545 C->numlhs = C->numrhs = 0;
1546 ClearTree(AT.ebufnum);
1547 C->Pointer = C->Buffer;
1548 }
1549 }
1550 break;
1551/*
1552 #] CLEANUPEXPRESSION :
1553 #[ HIGHERLEVELGENERATION :
1554*/
1555 case HIGHERLEVELGENERATION:
1556/*
1557 When foliating halfway the tree.
1558 This should only be needed in a second level load balancing
1559*/
1560 term = AT.WorkSpace; AT.WorkPointer = term + *term;
1561 if ( Generator(BHEAD term,AR.level) ) {
1563 MLOCK(ErrorMessageLock);
1564 MesPrint("Error in load balancing one term at level %d in thread %d in module %d",AR.level,AT.identity,AC.CModule);
1565 MUNLOCK(ErrorMessageLock);
1566 Terminate(-1);
1567 }
1568 AT.WorkPointer = term;
1569 break;
1570/*
1571 #] HIGHERLEVELGENERATION :
1572 #[ STARTNEWMODULE :
1573*/
1574 case STARTNEWMODULE:
1575/*
1576 For resetting variables.
1577*/
1578 SpecialCleanup(B);
1579 break;
1580/*
1581 #] STARTNEWMODULE :
1582 #[ TERMINATETHREAD :
1583*/
1584 case TERMINATETHREAD:
1585 goto EndOfThread;
1586/*
1587 #] TERMINATETHREAD :
1588 #[ DOONEEXPRESSION :
1589
1590 When a thread has to do a complete (not too big) expression.
1591 The number of the expression to be done is in AR.exprtodo.
1592 The code is mostly taken from Processor. The only difference
1593 is with what to do with the output.
1594 The output should go to the scratch buffer of the worker
1595 (which is free at the right moment). If this buffer is too
1596 small we have a problem. We could write to file or give the
1597 master what we have and from now on the master has to collect
1598 pieces until things are complete.
1599 Note: this assumes that the expressions don't keep their order.
1600 If they have to keep their order, don't use this feature.
1601*/
1602 case DOONEEXPRESSION: {
1603
1604 POSITION position, outposition;
1605 FILEHANDLE *fi, *fout, *oldoutfile;
1606 LONG dd = 0;
1607 WORD oldBracketOn = AR.BracketOn;
1608 WORD *oldBrackBuf = AT.BrackBuf;
1609 WORD oldbracketindexflag = AT.bracketindexflag;
1610 WORD fromspectator = 0;
1611 e = Expressions + AR.exprtodo;
1612 i = AR.exprtodo;
1613 AR.CurExpr = i;
1614 AR.SortType = AC.SortType;
1615 AR.expchanged = 0;
1616 if ( ( e->vflags & ISFACTORIZED ) != 0 ) {
1617 AR.BracketOn = 1;
1618 AT.BrackBuf = AM.BracketFactors;
1619 AT.bracketindexflag = 1;
1620 }
1621
1622 position = AS.OldOnFile[i];
1623 if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION
1624 || e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION ) {
1625 AR.GetFile = 2; fi = AR.hidefile;
1626 }
1627 else {
1628 AR.GetFile = 0; fi = AR.infile;
1629 }
1630/*
1631 PUTZERO(fi->POposition);
1632 if ( fi->handle >= 0 ) {
1633 fi->POfill = fi->POfull = fi->PObuffer;
1634 }
1635*/
1636 SetScratch(fi,&position);
1637 term = oldwork = AT.WorkPointer;
1638 AR.CompressPointer = AR.CompressBuffer;
1639 AR.CompressPointer[0] = 0;
1640 AR.KeptInHold = 0;
1641 if ( GetTerm(BHEAD term) <= 0 ) {
1642 MLOCK(ErrorMessageLock);
1643 MesPrint("Expression %d has problems in scratchfile (t)",i);
1644 MUNLOCK(ErrorMessageLock);
1645 Terminate(-1);
1646 }
1647 if ( AT.bracketindexflag > 0 ) OpenBracketIndex(i);
1648 term[3] = i;
1649 if ( term[5] < 0 ) {
1650 fromspectator = -term[5];
1651 PUTZERO(AM.SpectatorFiles[fromspectator-1].readpos);
1652 term[5] = AC.cbufnum;
1653 }
1654 PUTZERO(outposition);
1655 fout = AR.outfile;
1656 fout->POfill = fout->POfull = fout->PObuffer;
1657 fout->POposition = outposition;
1658 if ( fout->handle >= 0 ) {
1659 fout->POposition = outposition;
1660 }
1661/*
1662 The next statement is needed because we need the system
1663 to believe that the expression is at position zero for
1664 the moment. In this worker, with no memory of other expressions,
1665 it is. This is needed for when a bracket index is made
1666 because there e->onfile is an offset. Afterwards, when the
1667 expression is written to its final location in the masters
1668 output e->onfile will get its real value.
1669*/
1670 PUTZERO(e->onfile);
1671 if ( PutOut(BHEAD term,&outposition,fout,0) < 0 ) goto ProcErr;
1672
1673 AR.DeferFlag = AC.ComDefer;
1674
1675 AR.sLevel = AB[0]->R.sLevel;
1676 term = AT.WorkPointer;
1677 NewSort(BHEAD0);
1678 AR.MaxDum = AM.IndDum;
1679 AN.ninterms = 0;
1680 if ( fromspectator ) {
1681 while ( GetFromSpectator(term,fromspectator-1) ) {
1682 AT.WorkPointer = term + *term;
1683 AN.RepPoint = AT.RepCount + 1;
1684 AN.IndDum = AM.IndDum;
1685 AR.CurDum = ReNumber(BHEAD term);
1686 if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
1687 if ( AN.ncmod ) {
1688 if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
1689 else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
1690 }
1691 else if ( AC.PolyRatFunChanged ) PolyFunDirty(BHEAD term);
1692 if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
1693 && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
1694 PolyFunClean(BHEAD term);
1695 }
1696 if ( Generator(BHEAD term,0) ) {
1697 LowerSortLevel(); goto ProcErr;
1698 }
1699 }
1700 }
1701 else {
1702 while ( GetTerm(BHEAD term) ) {
1703 SeekScratch(fi,&position);
1704 AN.ninterms++; dd = AN.deferskipped;
1705 if ( ( e->vflags & ISFACTORIZED ) != 0 && term[1] == HAAKJE ) {
1706 StoreTerm(BHEAD term);
1707 }
1708 else {
1709 if ( AC.CollectFun && *term <= (AM.MaxTer/(2*(LONG)sizeof(WORD))) ) {
1710 if ( GetMoreTerms(term) < 0 ) {
1711 LowerSortLevel(); goto ProcErr;
1712 }
1713 SeekScratch(fi,&position);
1714 }
1715 AT.WorkPointer = term + *term;
1716 AN.RepPoint = AT.RepCount + 1;
1717 if ( AR.DeferFlag ) {
1718 AR.CurDum = AN.IndDum = Expressions[AR.exprtodo].numdummies;
1719 }
1720 else {
1721 AN.IndDum = AM.IndDum;
1722 AR.CurDum = ReNumber(BHEAD term);
1723 }
1724 if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
1725 if ( AN.ncmod ) {
1726 if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
1727 else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
1728 }
1729 else if ( AC.PolyRatFunChanged ) PolyFunDirty(BHEAD term);
1730 if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
1731 && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
1732 PolyFunClean(BHEAD term);
1733 }
1734 if ( Generator(BHEAD term,0) ) {
1735 LowerSortLevel(); goto ProcErr;
1736 }
1737 AN.ninterms += dd;
1738 }
1739 SetScratch(fi,&position);
1740 if ( fi == AR.hidefile ) {
1741 AR.InHiBuf = (fi->POfull-fi->PObuffer)
1742 -DIFBASE(position,fi->POposition)/sizeof(WORD);
1743 }
1744 else {
1745 AR.InInBuf = (fi->POfull-fi->PObuffer)
1746 -DIFBASE(position,fi->POposition)/sizeof(WORD);
1747 }
1748 }
1749 }
1750 AN.ninterms += dd;
1751 if ( EndSort(BHEAD AT.S0->sBuffer,0) < 0 ) goto ProcErr;
1752 e->numdummies = AR.MaxDum - AM.IndDum;
1753 AR.BracketOn = oldBracketOn;
1754 AT.BrackBuf = oldBrackBuf;
1755 if ( ( e->vflags & TOBEFACTORED ) != 0 )
1757 else if ( ( ( e->vflags & TOBEUNFACTORED ) != 0 )
1758 && ( ( e->vflags & ISFACTORIZED ) != 0 ) )
1760 if ( AT.S0->TermsLeft ) e->vflags &= ~ISZERO;
1761 else e->vflags |= ISZERO;
1762 if ( AR.expchanged == 0 ) e->vflags |= ISUNMODIFIED;
1763 if ( AT.S0->TermsLeft ) AR.expflags |= ISZERO;
1764 if ( AR.expchanged ) AR.expflags |= ISUNMODIFIED;
1765 AR.GetFile = 0;
1766 AT.bracketindexflag = oldbracketindexflag;
1767/*
1768 Now copy the whole thing from fout to AR0.outfile
1769 Do this in one go to keep the lock occupied as short as possible
1770*/
1771 SeekScratch(fout,&outposition);
1772 LOCK(AS.outputslock);
1773 oldoutfile = AB[0]->R.outfile;
1774 if ( e->status == INTOHIDELEXPRESSION || e->status == INTOHIDEGEXPRESSION ) {
1775 AB[0]->R.outfile = AB[0]->R.hidefile;
1776 }
1777 SeekScratch(AB[0]->R.outfile,&position);
1778 e->onfile = position;
1779 if ( CopyExpression(fout,AB[0]->R.outfile) < 0 ) {
1780 AB[0]->R.outfile = oldoutfile;
1781 UNLOCK(AS.outputslock);
1782 MLOCK(ErrorMessageLock);
1783 MesPrint("Error copying output of 'InParallel' expression to master. Thread: %d",identity);
1784 MUNLOCK(ErrorMessageLock);
1785 goto ProcErr;
1786 }
1787 AB[0]->R.outfile = oldoutfile;
1788 AB[0]->R.hidefile->POfull = AB[0]->R.hidefile->POfill;
1789 AB[0]->R.expflags = AR.expflags;
1790 UNLOCK(AS.outputslock);
1791
1792 if ( fout->handle >= 0 ) { /* Now get rid of the file */
1793 CloseFile(fout->handle);
1794 fout->handle = -1;
1795 remove(fout->name);
1796 PUTZERO(fout->POposition);
1797 PUTZERO(fout->filesize);
1798 fout->POfill = fout->POfull = fout->PObuffer;
1799 }
1800 UpdateMaxSize();
1801
1802 AT.WorkPointer = oldwork;
1803
1804 } break;
1805/*
1806 #] DOONEEXPRESSION :
1807 #[ DOBRACKETS :
1808
1809 In case we have a bracket index we can have the worker treat
1810 one or more of the entries in the bracket index.
1811 The advantage is that identical terms will meet each other
1812 sooner in the sorting and hence fewer compares will be needed.
1813 Also this way the master doesn't need to fill the buckets.
1814 The main problem is the load balancing which can become very
1815 bad when there is a long tail without things outside the bracket.
1816
1817 We get sent:
1818 1: The number of the first bracket to be done
1819 2: The number of the last bracket to be done
1820*/
1821 case DOBRACKETS: {
1822 BRACKETINFO *binfo;
1823 BRACKETINDEX *bi;
1824 FILEHANDLE *fi;
1825 POSITION stoppos,where;
1826 e = Expressions + AR.CurExpr;
1827 binfo = e->bracketinfo;
1828 thr = AN.threadbuck;
1829 bi = &(binfo->indexbuffer[thr->firstbracket]);
1830 if ( AR.GetFile == 2 ) fi = AR.hidefile;
1831 else fi = AR.infile;
1832 where = bi->start;
1833 ADD2POS(where,AS.OldOnFile[AR.CurExpr]);
1834 SetScratch(fi,&(where));
1835 stoppos = binfo->indexbuffer[thr->lastbracket].next;
1836 ADD2POS(stoppos,AS.OldOnFile[AR.CurExpr]);
1837 AN.ninterms = thr->firstterm;
1838/*
1839 Now we have to put the 'value' of the bracket in the
1840 Compress buffer.
1841*/
1842 ttco = AR.CompressBuffer;
1843 tt = binfo->bracketbuffer + bi->bracket;
1844 i = *tt;
1845 NCOPY(ttco,tt,i)
1846 AR.CompressPointer = ttco;
1847 term = AT.WorkPointer;
1848 while ( GetTerm(BHEAD term) ) {
1849 SeekScratch(fi,&where);
1850 AT.WorkPointer = term + *term;
1851 AN.IndDum = AM.IndDum;
1852 AR.CurDum = ReNumber(BHEAD term);
1853 if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
1854 if ( AN.ncmod ) {
1855 if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
1856 else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
1857 }
1858 else if ( AC.PolyRatFunChanged ) PolyFunDirty(BHEAD term);
1859 if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
1860 && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
1861 PolyFunClean(BHEAD term);
1862 }
1863 if ( ( AP.PreDebug & THREADSDEBUG ) != 0 ) {
1864 MLOCK(ErrorMessageLock);
1865 MesPrint("Thread %w executing term:");
1866 PrintTerm(term,"DoBrackets");
1867 MUNLOCK(ErrorMessageLock);
1868 }
1869 AT.WorkPointer = term + *term;
1870 if ( Generator(BHEAD term,0) ) {
1872 MLOCK(ErrorMessageLock);
1873 MesPrint("Error in processing one term in thread %d in module %d",identity,AC.CModule);
1874 MUNLOCK(ErrorMessageLock);
1875 Terminate(-1);
1876 }
1877 AN.ninterms++;
1878 SetScratch(fi,&(where));
1879 if ( ISGEPOS(where,stoppos) ) break;
1880 }
1881 AT.WorkPointer = term;
1882 thr->free = BUCKETCOMINGFREE;
1883 break;
1884 }
1885/*
1886 #] DOBRACKETS :
1887 #[ CLEARCLOCK :
1888
1889 The program only comes here after a .clear
1890*/
1891 case CLEARCLOCK:
1892/* LOCK(clearclocklock); */
1893 sumtimerinfo[identity] += TimeCPU(1);
1894 timerinfo[identity] = TimeCPU(0);
1895/* UNLOCK(clearclocklock); */
1896 break;
1897/*
1898 #] CLEARCLOCK :
1899 #[ MCTSEXPANDTREE :
1900*/
1901 case MCTSEXPANDTREE:
1902 AT.optimtimes = AB[0]->T.optimtimes;
1903 find_Horner_MCTS_expand_tree();
1904 break;
1905/*
1906 #] MCTSEXPANDTREE :
1907 #[ OPTIMIZEEXPRESSION :
1908*/
1909 case OPTIMIZEEXPRESSION:
1911 break;
1912/*
1913 #] OPTIMIZEEXPRESSION :
1914*/
1915 default:
1916 MLOCK(ErrorMessageLock);
1917 MesPrint("Illegal wakeup signal %d for thread %d",wakeupsignal,identity);
1918 MUNLOCK(ErrorMessageLock);
1919 Terminate(-1);
1920 break;
1921 }
1922 /* we need the following update in case we are using checkpoints. then we
1923 need to readjust the clocks when recovering using this information */
1924 timerinfo[identity] = TimeCPU(1);
1925 }
1926EndOfThread:;
1927/*
1928 This is the end of the thread. We cleanup and exit.
1929 If we are using flint, call the per-thread cleanup function. This keep valgrind happy.
1930*/
1931#ifdef WITHFLINT
1932 flint_final_cleanup_thread();
1933#endif
1934 FinalizeOneThread(identity);
1935 return(0);
1936ProcErr:
1937 Terminate(-1);
1938 return(0);
1939}
1940
1941/*
1942 #] RunThread :
1943 #[ RunSortBot :
1944*/
1952#ifdef WITHSORTBOTS
1953
1954void *RunSortBot(void *dummy)
1955{
1956 int identity, wakeupsignal, identityretv;
1957 ALLPRIVATES *B, *BB;
1958 DUMMYUSE(dummy);
1959 identity = SetIdentity(&identityretv);
1960 threadpointers[identity] = pthread_self();
1961 B = InitializeOneThread(identity);
1962 while ( ( wakeupsignal = SortBotWait(identity) ) > 0 ) {
1963 switch ( wakeupsignal ) {
1964/*
1965 #[ INISORTBOT :
1966*/
1967 case INISORTBOT:
1968 AR.CompressBuffer = AB[0]->R.CompressBuffer;
1969 AR.ComprTop = AB[0]->R.ComprTop;
1970 AR.CompressPointer = AB[0]->R.CompressPointer;
1971 AR.CurExpr = AB[0]->R.CurExpr;
1972 AR.PolyFun = AB[0]->R.PolyFun;
1973 AR.PolyFunInv = AB[0]->R.PolyFunInv;
1974 AR.PolyFunType = AB[0]->R.PolyFunType;
1975 AR.PolyFunExp = AB[0]->R.PolyFunExp;
1976 AR.PolyFunVar = AB[0]->R.PolyFunVar;
1977 AR.PolyFunPow = AB[0]->R.PolyFunPow;
1978 AR.SortType = AC.SortType;
1979 if ( AR.PolyFun == 0 ) { AT.SS->PolyFlag = 0; }
1980 else if ( AR.PolyFunType == 1 ) { AT.SS->PolyFlag = 1; }
1981 else if ( AR.PolyFunType == 2 ) {
1982 if ( AR.PolyFunExp == 2
1983 || AR.PolyFunExp == 3 ) AT.SS->PolyFlag = 1;
1984 else AT.SS->PolyFlag = 2;
1985 }
1986 AT.SS->PolyWise = 0;
1987 AN.ncmod = AC.ncmod;
1988 LOCK(AT.SB.MasterBlockLock[1]);
1989 BB = AB[AT.SortBotIn1];
1990 LOCK(BB->T.SB.MasterBlockLock[BB->T.SB.MasterNumBlocks]);
1991 BB = AB[AT.SortBotIn2];
1992 LOCK(BB->T.SB.MasterBlockLock[BB->T.SB.MasterNumBlocks]);
1993 AT.SB.FillBlock = 1;
1994 AT.SB.MasterFill[1] = AT.SB.MasterStart[1];
1995 SETBASEPOSITION(AN.theposition,0);
1996 // Reset the sortbot comparison count
1997 AT.SS->verbComparisons = 0;
1998 break;
1999/*
2000 #] INISORTBOT :
2001 #[ RUNSORTBOT :
2002*/
2003 case RUNSORTBOT:
2004 SortBotMerge(B);
2005 break;
2006/*
2007 #] RUNSORTBOT :
2008 #[ TERMINATETHREAD :
2009*/
2010 case TERMINATETHREAD:
2011 goto EndOfThread;
2012/*
2013 #] TERMINATETHREAD :
2014 #[ CLEARCLOCK :
2015
2016 The program only comes here after a .clear
2017*/
2018 case CLEARCLOCK:
2019/* LOCK(clearclocklock); */
2020 sumtimerinfo[identity] += TimeCPU(1);
2021 timerinfo[identity] = TimeCPU(0);
2022/* UNLOCK(clearclocklock); */
2023 break;
2024/*
2025 #] CLEARCLOCK :
2026*/
2027 default:
2028 MLOCK(ErrorMessageLock);
2029 MesPrint("Illegal wakeup signal %d for thread %d",wakeupsignal,identity);
2030 MUNLOCK(ErrorMessageLock);
2031 Terminate(-1);
2032 break;
2033 }
2034 }
2035EndOfThread:;
2036/*
2037 This is the end of the thread. We cleanup and exit.
2038 If we are using flint, call the per-thread cleanup function. This keep valgrind happy.
2039*/
2040#ifdef WITHFLINT
2041 flint_final_cleanup_thread();
2042#endif
2043 FinalizeOneThread(identity);
2044 return(0);
2045}
2046
2047#endif
2048
2049/*
2050 #] RunSortBot :
2051 #[ IAmAvailable :
2052*/
2064void IAmAvailable(int identity)
2065{
2066 int top;
2067 LOCK(availabilitylock);
2068 top = topofavailables;
2069 listofavailables[topofavailables++] = identity;
2070 if ( top == 0 ) {
2071 UNLOCK(availabilitylock);
2072 LOCK(wakeupmasterlock);
2073 wakeupmaster = identity;
2074 pthread_cond_signal(&wakeupmasterconditions);
2075 UNLOCK(wakeupmasterlock);
2076 }
2077 else {
2078 UNLOCK(availabilitylock);
2079 }
2080}
2081
2082/*
2083 #] IAmAvailable :
2084 #[ GetAvailableThread :
2085*/
2094int GetAvailableThread(void)
2095{
2096 int retval = -1;
2097 LOCK(availabilitylock);
2098 if ( topofavailables > 0 ) retval = listofavailables[--topofavailables];
2099 UNLOCK(availabilitylock);
2100 if ( retval >= 0 ) {
2101/*
2102 Make sure the thread is indeed waiting and not between
2103 saying that it is available and starting to wait.
2104*/
2105 LOCK(wakeuplocks[retval]);
2106 UNLOCK(wakeuplocks[retval]);
2107 }
2108 return(retval);
2109}
2110
2111/*
2112 #] GetAvailableThread :
2113 #[ ConditionalGetAvailableThread :
2114*/
2122int ConditionalGetAvailableThread(void)
2123{
2124 int retval = -1;
2125 if ( topofavailables > 0 ) {
2126 LOCK(availabilitylock);
2127 if ( topofavailables > 0 ) {
2128 retval = listofavailables[--topofavailables];
2129 }
2130 UNLOCK(availabilitylock);
2131 if ( retval >= 0 ) {
2132/*
2133 Make sure the thread is indeed waiting and not between
2134 saying that it is available and starting to wait.
2135*/
2136 LOCK(wakeuplocks[retval]);
2137 UNLOCK(wakeuplocks[retval]);
2138 }
2139 }
2140 return(retval);
2141}
2142
2143/*
2144 #] ConditionalGetAvailableThread :
2145 #[ GetThread :
2146*/
2156int GetThread(int identity)
2157{
2158 int retval = -1, j;
2159 LOCK(availabilitylock);
2160 for ( j = 0; j < topofavailables; j++ ) {
2161 if ( identity == listofavailables[j] ) break;
2162 }
2163 if ( j < topofavailables ) {
2164 --topofavailables;
2165 for ( ; j < topofavailables; j++ ) {
2166 listofavailables[j] = listofavailables[j+1];
2167 }
2168 retval = identity;
2169 }
2170 UNLOCK(availabilitylock);
2171 return(retval);
2172}
2173
2174/*
2175 #] GetThread :
2176 #[ ThreadWait :
2177*/
2187int ThreadWait(int identity)
2188{
2189 int retval, top, j;
2190 LOCK(wakeuplocks[identity]);
2191 LOCK(availabilitylock);
2192 top = topofavailables;
2193 for ( j = topofavailables; j > 0; j-- )
2194 listofavailables[j] = listofavailables[j-1];
2195 listofavailables[0] = identity;
2196 topofavailables++;
2197 if ( top == 0 || topofavailables == numberofworkers ) {
2198 UNLOCK(availabilitylock);
2199 LOCK(wakeupmasterlock);
2200 wakeupmaster = identity;
2201 pthread_cond_signal(&wakeupmasterconditions);
2202 UNLOCK(wakeupmasterlock);
2203 }
2204 else {
2205 UNLOCK(availabilitylock);
2206 }
2207 while ( wakeup[identity] == 0 ) {
2208 pthread_cond_wait(&(wakeupconditions[identity]),&(wakeuplocks[identity]));
2209 }
2210 retval = wakeup[identity];
2211 wakeup[identity] = 0;
2212 UNLOCK(wakeuplocks[identity]);
2213 return(retval);
2214}
2215
2216/*
2217 #] ThreadWait :
2218 #[ SortBotWait :
2219*/
2220
2221#ifdef WITHSORTBOTS
2231int SortBotWait(int identity)
2232{
2233 int retval;
2234 LOCK(wakeuplocks[identity]);
2235 LOCK(availabilitylock);
2236 topsortbotavailables++;
2237 if ( topsortbotavailables >= numberofsortbots ) {
2238 UNLOCK(availabilitylock);
2239 LOCK(wakeupsortbotlock);
2240 wakeupmaster = identity;
2241 pthread_cond_signal(&wakeupsortbotconditions);
2242 UNLOCK(wakeupsortbotlock);
2243 }
2244 else {
2245 UNLOCK(availabilitylock);
2246 }
2247 while ( wakeup[identity] == 0 ) {
2248 pthread_cond_wait(&(wakeupconditions[identity]),&(wakeuplocks[identity]));
2249 }
2250 retval = wakeup[identity];
2251 wakeup[identity] = 0;
2252 UNLOCK(wakeuplocks[identity]);
2253 return(retval);
2254}
2255
2256#endif
2257
2258/*
2259 #] SortBotWait :
2260 #[ ThreadClaimedBlock :
2261*/
2272int ThreadClaimedBlock(int identity)
2273{
2274 LOCK(availabilitylock);
2275 numberclaimed++;
2276 if ( numberclaimed >= numberofworkers ) {
2277 UNLOCK(availabilitylock);
2278 LOCK(wakeupmasterlock);
2279 wakeupmaster = identity;
2280 pthread_cond_signal(&wakeupmasterconditions);
2281 UNLOCK(wakeupmasterlock);
2282 }
2283 else {
2284 UNLOCK(availabilitylock);
2285 }
2286 return(0);
2287}
2288
2289/*
2290 #] ThreadClaimedBlock :
2291 #[ MasterWait :
2292*/
2300int MasterWait(void)
2301{
2302 int retval;
2303 LOCK(wakeupmasterlock);
2304 while ( wakeupmaster == 0 ) {
2305 pthread_cond_wait(&wakeupmasterconditions,&wakeupmasterlock);
2306 }
2307 retval = wakeupmaster;
2308 wakeupmaster = 0;
2309 UNLOCK(wakeupmasterlock);
2310 return(retval);
2311}
2312
2313/*
2314 #] MasterWait :
2315 #[ MasterWaitThread :
2316*/
2323int MasterWaitThread(int identity)
2324{
2325 int retval;
2326 LOCK(wakeupmasterthreadlocks[identity]);
2327 while ( wakeupmasterthread[identity] == 0 ) {
2328 pthread_cond_wait(&(wakeupmasterthreadconditions[identity])
2329 ,&(wakeupmasterthreadlocks[identity]));
2330 }
2331 retval = wakeupmasterthread[identity];
2332 wakeupmasterthread[identity] = 0;
2333 UNLOCK(wakeupmasterthreadlocks[identity]);
2334 return(retval);
2335}
2336
2337/*
2338 #] MasterWaitThread :
2339 #[ MasterWaitAll :
2340*/
2347void MasterWaitAll(void)
2348{
2349 LOCK(wakeupmasterlock);
2350 while ( topofavailables < numberofworkers ) {
2351 pthread_cond_wait(&wakeupmasterconditions,&wakeupmasterlock);
2352 }
2353 UNLOCK(wakeupmasterlock);
2354 return;
2355}
2356
2357/*
2358 #] MasterWaitAll :
2359 #[ MasterWaitAllSortBots :
2360*/
2361
2362#ifdef WITHSORTBOTS
2363
2369void MasterWaitAllSortBots(void)
2370{
2371 LOCK(wakeupsortbotlock);
2372 while ( topsortbotavailables < numberofsortbots ) {
2373 pthread_cond_wait(&wakeupsortbotconditions,&wakeupsortbotlock);
2374 }
2375 UNLOCK(wakeupsortbotlock);
2376 return;
2377}
2378
2379#endif
2380
2381/*
2382 #] MasterWaitAllSortBots :
2383 #[ MasterWaitAllBlocks :
2384*/
2391void MasterWaitAllBlocks(void)
2392{
2393 LOCK(wakeupmasterlock);
2394 while ( numberclaimed < numberofworkers ) {
2395 pthread_cond_wait(&wakeupmasterconditions,&wakeupmasterlock);
2396 }
2397 UNLOCK(wakeupmasterlock);
2398 return;
2399}
2400
2401/*
2402 #] MasterWaitAllBlocks :
2403 #[ WakeupThread :
2404*/
2413void WakeupThread(int identity, int signalnumber)
2414{
2415 if ( signalnumber == 0 ) {
2416 MLOCK(ErrorMessageLock);
2417 MesPrint("Illegal wakeup signal for thread %d",identity);
2418 MUNLOCK(ErrorMessageLock);
2419 Terminate(-1);
2420 }
2421 LOCK(wakeuplocks[identity]);
2422 wakeup[identity] = signalnumber;
2423 pthread_cond_signal(&(wakeupconditions[identity]));
2424 UNLOCK(wakeuplocks[identity]);
2425}
2426
2427/*
2428 #] WakeupThread :
2429 #[ WakeupMasterFromThread :
2430*/
2439void WakeupMasterFromThread(int identity, int signalnumber)
2440{
2441 if ( signalnumber == 0 ) {
2442 MLOCK(ErrorMessageLock);
2443 MesPrint("Illegal wakeup signal for master %d",identity);
2444 MUNLOCK(ErrorMessageLock);
2445 Terminate(-1);
2446 }
2447 LOCK(wakeupmasterthreadlocks[identity]);
2448 wakeupmasterthread[identity] = signalnumber;
2449 pthread_cond_signal(&(wakeupmasterthreadconditions[identity]));
2450 UNLOCK(wakeupmasterthreadlocks[identity]);
2451}
2452
2453/*
2454 #] WakeupMasterFromThread :
2455 #[ SendOneBucket :
2456*/
2462int SendOneBucket(int type)
2463{
2464 ALLPRIVATES *B0 = AB[0];
2465 THREADBUCKET *thr = 0;
2466 int j, k, id;
2467 for ( j = 0; j < numthreadbuckets; j++ ) {
2468 if ( threadbuckets[j]->free == BUCKETFILLED ) {
2469 thr = threadbuckets[j];
2470 for ( k = j+1; k < numthreadbuckets; k++ )
2471 threadbuckets[k-1] = threadbuckets[k];
2472 threadbuckets[numthreadbuckets-1] = thr;
2473 break;
2474 }
2475 }
2476 AN0.ninterms++;
2477 while ( ( id = GetAvailableThread() ) < 0 ) { MasterWait(); }
2478/*
2479 Prepare the thread. Give it the term and variables.
2480*/
2481 LoadOneThread(0,id,thr,0);
2482 thr->busy = BUCKETASSIGNED;
2483 thr->free = BUCKETINUSE;
2484 numberoffullbuckets--;
2485/*
2486 And signal the thread to run.
2487 Form now on we may only interfere with this bucket
2488 1: after it has been marked BUCKETCOMINGFREE
2489 2: when thr->busy == BUCKETDOINGTERM and then only when protected by
2490 thr->lock. This would be for load balancing.
2491*/
2492 WakeupThread(id,type);
2493/* AN0.ninterms += thr->ddterms; */
2494 return(0);
2495}
2496
2497/*
2498 #] SendOneBucket :
2499 #[ InParallelProcessor :
2500*/
2520int InParallelProcessor(void)
2521{
2522 GETIDENTITY
2523 int i, id, retval = 0, num = 0;
2524 EXPRESSIONS e;
2525 if ( numberofworkers >= 2 ) {
2526 SetWorkerFiles();
2527 for ( i = 0; i < NumExpressions; i++ ) {
2528 e = Expressions+i;
2529 if ( e->partodo <= 0 ) continue;
2530 if ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION
2531 || e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION
2532 || e->status == INTOHIDELEXPRESSION || e->status == INTOHIDEGEXPRESSION ) {
2533 }
2534 else {
2535 e->partodo = 0;
2536 continue;
2537 }
2538 if ( e->counter == 0 ) { /* Expression with zero terms */
2539 e->partodo = 0;
2540 continue;
2541 }
2542/*
2543 This expression should go to an idle worker
2544*/
2545 while ( ( id = GetAvailableThread() ) < 0 ) { MasterWait(); }
2546 LoadOneThread(0,id,0,-1);
2547 AB[id]->R.exprtodo = i;
2548 WakeupThread(id,DOONEEXPRESSION);
2549 num++;
2550 }
2551/*
2552 Now we have to wait for all workers to finish
2553*/
2554 if ( num > 0 ) MasterWaitAll();
2555
2556 if ( AC.CollectFun ) AR.DeferFlag = 0;
2557 }
2558 else {
2559 for ( i = 0; i < NumExpressions; i++ ) {
2560 Expressions[i].partodo = 0;
2561 }
2562 }
2563 return(retval);
2564}
2565
2566/*
2567 #] InParallelProcessor :
2568 #[ ThreadsProcessor :
2569*/
2594int ThreadsProcessor(EXPRESSIONS e, WORD LastExpression, WORD fromspectator)
2595{
2596 ALLPRIVATES *B0 = AB[0], *B = B0;
2597 int id, oldgzipCompress, endofinput = 0, j, still, k, defcount = 0, bra = 0, first = 1;
2598 LONG dd = 0, ddd, thrbufsiz, thrbufsiz0, thrbufsiz2, numbucket = 0, numpasses;
2599 LONG num, i;
2600 WORD *oldworkpointer = AT0.WorkPointer, *tt, *ttco = 0, *t1 = 0, ter, *tstop = 0, *t2;
2601 THREADBUCKET *thr = 0;
2602 FILEHANDLE *oldoutfile = AR0.outfile;
2603 GETTERM GetTermP = &GetTerm;
2604 POSITION eonfile = AS.OldOnFile[e-Expressions];
2605 numberoffullbuckets = 0;
2606/*
2607 Start up all threads. The lock needs to be around the whole loop
2608 to keep processes from terminating quickly and putting themselves
2609 in the list of available threads again.
2610*/
2611 AM.tracebackflag = 1;
2612
2613 AS.sLevel = AR0.sLevel;
2614 LOCK(availabilitylock);
2615 topofavailables = 0;
2616 for ( id = 1; id <= numberofworkers; id++ ) {
2617 WakeupThread(id,STARTNEWEXPRESSION);
2618 }
2619 UNLOCK(availabilitylock);
2620 NewSort(BHEAD0);
2621 AN0.ninterms = 1;
2622/*
2623 Now for redefine
2624*/
2625 if ( AC.numpfirstnum > 0 ) {
2626 for ( j = 0; j < AC.numpfirstnum; j++ ) {
2627 AC.inputnumbers[j] = -1;
2628 }
2629 }
2630 MasterWaitAll();
2631/*
2632 Determine a reasonable bucketsize.
2633 This is based on the value of AC.ThreadBucketSize and the number
2634 of terms. We want at least 5 buckets per worker at the moment.
2635 Some research should show whether this is reasonable.
2636
2637 The number of terms in the expression is in e->counter
2638*/
2639 thrbufsiz2 = thrbufsiz = AC.ThreadBucketSize-1;
2640 if ( ( e->counter / ( numberofworkers * 5 ) ) < thrbufsiz ) {
2641 thrbufsiz = e->counter / ( numberofworkers * 5 ) - 1;
2642 if ( thrbufsiz < 0 ) thrbufsiz = 0;
2643 }
2644 thrbufsiz0 = thrbufsiz;
2645 numpasses = 5; /* this is just for trying */
2646 thrbufsiz = thrbufsiz0 / (2 << numpasses);
2647/*
2648 Mark all buckets as free and take the first.
2649*/
2650 for ( j = 0; j < numthreadbuckets; j++ )
2651 threadbuckets[j]->free = BUCKETFREE;
2652 thr = threadbuckets[0];
2653/*
2654 #[ Whole brackets :
2655
2656 First we look whether we have to work with entire brackets
2657 This is the case when there is a non-NULL address in e->bracketinfo.
2658 Of course we shouldn't have interference from a collect or keep statement.
2659*/
2660#ifdef WHOLEBRACKETS
2661 if ( e->bracketinfo && AC.CollectFun == 0 && AR0.DeferFlag == 0 ) {
2662 FILEHANDLE *curfile;
2663 int didone = 0;
2664 LONG num, n;
2665 AN0.expr = e;
2666 for ( n = 0; n < e->bracketinfo->indexfill; n++ ) {
2667 num = TreatIndexEntry(B0,n);
2668 if ( num > 0 ) {
2669 didone = 1;
2670/*
2671 This bracket can be sent off.
2672 1: Look for an empty bucket
2673*/
2674ReTry:;
2675 for ( j = 0; j < numthreadbuckets; j++ ) {
2676 switch ( threadbuckets[j]->free ) {
2677 case BUCKETFREE:
2678 thr = threadbuckets[j];
2679 goto Found1;
2680 case BUCKETCOMINGFREE:
2681 thr = threadbuckets[j];
2682 thr->free = BUCKETFREE;
2683 for ( k = j+1; k < numthreadbuckets; k++ )
2684 threadbuckets[k-1] = threadbuckets[k];
2685 threadbuckets[numthreadbuckets-1] = thr;
2686 j--;
2687 break;
2688 default:
2689 break;
2690 }
2691 }
2692Found1:;
2693 if ( j < numthreadbuckets ) {
2694/*
2695 Found an empty bucket. Fill it.
2696*/
2697 thr->firstbracket = n;
2698 thr->lastbracket = n + num - 1;
2699 thr->type = BUCKETDOINGBRACKET;
2700 thr->free = BUCKETFILLED;
2701 thr->firstterm = AN0.ninterms;
2702 for ( j = n; j < n+num; j++ ) {
2703 AN0.ninterms += e->bracketinfo->indexbuffer[j].termsinbracket;
2704 }
2705 n += num-1;
2706 numberoffullbuckets++;
2707 if ( topofavailables > 0 ) {
2708 SendOneBucket(DOBRACKETS);
2709 }
2710 }
2711/*
2712 All buckets are in use.
2713 Look/wait for an idle worker. Give it a bucket.
2714 After that, retry for a bucket
2715*/
2716 else {
2717 while ( topofavailables <= 0 ) {
2718 MasterWait();
2719 }
2720 SendOneBucket(DOBRACKETS);
2721 goto ReTry;
2722 }
2723 }
2724 }
2725 if ( didone ) {
2726/*
2727 And now put the input back in the original position.
2728*/
2729 switch ( e->status ) {
2730 case UNHIDELEXPRESSION:
2731 case UNHIDEGEXPRESSION:
2732 case DROPHLEXPRESSION:
2733 case DROPHGEXPRESSION:
2734 case HIDDENLEXPRESSION:
2735 case HIDDENGEXPRESSION:
2736 curfile = AR0.hidefile;
2737 break;
2738 default:
2739 curfile = AR0.infile;
2740 break;
2741 }
2742 SetScratch(curfile,&eonfile);
2743 GetTerm(B0,AT0.WorkPointer);
2744/*
2745 Now we point the GetTerm that is used to the one that is selective
2746*/
2747 GetTermP = &GetTerm2;
2748/*
2749 Next wait till there is a bucket available and initialize thr to it.
2750*/
2751 for(;;) {
2752 for ( j = 0; j < numthreadbuckets; j++ ) {
2753 switch ( threadbuckets[j]->free ) {
2754 case BUCKETFREE:
2755 thr = threadbuckets[j];
2756 goto Found2;
2757 case BUCKETCOMINGFREE:
2758 thr = threadbuckets[j];
2759 thr->free = BUCKETFREE;
2760 for ( k = j+1; k < numthreadbuckets; k++ )
2761 threadbuckets[k-1] = threadbuckets[k];
2762 threadbuckets[numthreadbuckets-1] = thr;
2763 j--;
2764 break;
2765 default:
2766 break;
2767 }
2768 }
2769 while ( topofavailables <= 0 ) {
2770 MasterWait();
2771 }
2772 while ( topofavailables > 0 && numberoffullbuckets > 0 ) {
2773 SendOneBucket(DOBRACKETS);
2774 }
2775 }
2776Found2:;
2777 while ( numberoffullbuckets > 0 ) {
2778 while ( topofavailables <= 0 ) {
2779 MasterWait();
2780 }
2781 while ( topofavailables > 0 && numberoffullbuckets > 0 ) {
2782 SendOneBucket(DOBRACKETS);
2783 }
2784 }
2785/*
2786 Disable the 'warming up' with smaller buckets.
2787
2788 numpasses = 0;
2789 thrbufsiz = thrbufsiz0;
2790*/
2791 AN0.lastinindex = -1;
2792 }
2793 MasterWaitAll();
2794 }
2795#endif
2796/*
2797 #] Whole brackets :
2798
2799 Now the loop to start a bucket
2800*/
2801 for(;;) {
2802 if ( fromspectator ) {
2803 ter = GetFromSpectator(thr->threadbuffer,fromspectator-1);
2804 if ( ter == 0 ) fromspectator = 0;
2805 }
2806 else {
2807 ter = GetTermP(B0,thr->threadbuffer);
2808/*
2809 At this point we could check whether the input term is
2810 just an expression that resides in a scratch file.
2811 If this is the case we should store the current input info
2812 (file and buffer content) and redirect the input.
2813 At the end we can go back to where we were.
2814 There are two possibilities: we are in the same scratchfile
2815 as the main input and the other expression is still in the
2816 input buffer, or we have to do some reading. The reading is
2817 of course done in GetTerm.
2818
2819 How to set this up can also be studied in TestSub (in file proces.c)
2820 where it checks for EXPRESSION.
2821*/
2822 }
2823 if ( ter < 0 ) break;
2824 if ( ter == 0 ) { endofinput = 1; goto Finalize; }
2825 dd = AN0.deferskipped;
2826 if ( AR0.DeferFlag ) {
2827 defcount = 0;
2828 thr->deferbuffer[defcount++] = AR0.DefPosition;
2829 ttco = thr->compressbuffer; t1 = AR0.CompressBuffer; j = *t1;
2830 NCOPY(ttco,t1,j);
2831 }
2832 else if ( first && ( AC.CollectFun == 0 ) ) { /* Brackets ? */
2833 first = 0;
2834 t1 = tstop = thr->threadbuffer;
2835 tstop += *tstop; tstop -= ABS(tstop[-1]);
2836 t1++;
2837 while ( t1 < tstop ) {
2838 if ( t1[0] == HAAKJE ) { bra = 1; break; }
2839 t1 += t1[1];
2840 }
2841 t1 = thr->threadbuffer;
2842 }
2843/*
2844 Check whether we have a collect,function going. If so execute it.
2845*/
2846 if ( AC.CollectFun && *(thr->threadbuffer) < (AM.MaxTer/((LONG)sizeof(WORD))-10) ) {
2847 if ( ( dd = GetMoreTerms(thr->threadbuffer) ) < 0 ) {
2848 LowerSortLevel(); goto ProcErr;
2849 }
2850 }
2851/*
2852 Check whether we have a priority task:
2853*/
2854 if ( topofavailables > 0 && numberoffullbuckets > 0 ) SendOneBucket(LOWESTLEVELGENERATION);
2855/*
2856 Now put more terms in the bucket. Position tt after the first term
2857*/
2858 tt = thr->threadbuffer; tt += *tt;
2859 thr->totnum = 1;
2860 thr->usenum = 0;
2861/*
2862 Next we worry about the 'slow startup' in which we make the initial
2863 buckets smaller, so that we get all threads busy as soon as possible.
2864*/
2865 if ( numpasses > 0 ) {
2866 numbucket++;
2867 if ( numbucket >= numberofworkers ) {
2868 numbucket = 0;
2869 numpasses--;
2870 if ( numpasses == 0 ) thrbufsiz = thrbufsiz0;
2871 else thrbufsiz = thrbufsiz0 / (2 << numpasses);
2872 }
2873 thrbufsiz2 = thrbufsiz + thrbufsiz/5; /* for completing brackets */
2874 }
2875/*
2876 we have already 1+dd terms
2877*/
2878 while ( ( dd < thrbufsiz ) &&
2879 ( tt - thr->threadbuffer ) < ( thr->threadbuffersize - AM.MaxTer/((LONG)sizeof(WORD)) - 2 ) ) {
2880/*
2881 First check:
2882*/
2883 if ( topofavailables > 0 && numberoffullbuckets > 0 ) SendOneBucket(LOWESTLEVELGENERATION);
2884/*
2885 There is room in the bucket. Fill yet another term.
2886*/
2887 if ( GetTermP(B0,tt) == 0 ) { endofinput = 1; break; }
2888 dd++;
2889 thr->totnum++;
2890 dd += AN0.deferskipped;
2891 if ( AR0.DeferFlag ) {
2892 thr->deferbuffer[defcount++] = AR0.DefPosition;
2893 t1 = AR0.CompressBuffer; j = *t1;
2894 NCOPY(ttco,t1,j);
2895 }
2896 if ( AC.CollectFun && *tt < (AM.MaxTer/((LONG)sizeof(WORD))-10) ) {
2897 if ( ( ddd = GetMoreTerms(tt) ) < 0 ) {
2898 LowerSortLevel(); goto ProcErr;
2899 }
2900 dd += ddd;
2901 }
2902 t1 = tt;
2903 tt += *tt;
2904 }
2905/*
2906 Check whether there are regular brackets and if we have no DeferFlag
2907 and no collect, we try to add more terms till we finish the current
2908 bracket. We should however not overdo it. Let us say: up to 20%
2909 more terms are allowed.
2910*/
2911 if ( bra ) {
2912 tstop = t1 + *t1; tstop -= ABS(tstop[-1]);
2913 t2 = t1+1;
2914 while ( t2 < tstop ) {
2915 if ( t2[0] == HAAKJE ) { break; }
2916 t2 += t2[1];
2917 }
2918 if ( t2[0] == HAAKJE ) {
2919 t2 += t2[1]; num = t2 - t1;
2920 while ( ( dd < thrbufsiz2 ) &&
2921 ( tt - thr->threadbuffer ) < ( thr->threadbuffersize - AM.MaxTer - 2 ) ) {
2922/*
2923 First check:
2924*/
2925 if ( topofavailables > 0 && numberoffullbuckets > 0 ) SendOneBucket(LOWESTLEVELGENERATION);
2926/*
2927 There is room in the bucket. Fill yet another term.
2928*/
2929 if ( GetTermP(B0,tt) == 0 ) { endofinput = 1; break; }
2930/*
2931 Same bracket?
2932*/
2933 tstop = tt + *tt; tstop -= ABS(tstop[-1]);
2934 if ( tstop-tt < num ) { /* Different: abort */
2935 AR0.KeptInHold = 1;
2936 break;
2937 }
2938 for ( i = 1; i < num; i++ ) {
2939 if ( t1[i] != tt[i] ) break;
2940 }
2941 if ( i < num ) { /* Different: abort */
2942 AR0.KeptInHold = 1;
2943 break;
2944 }
2945/*
2946 Same bracket. We need this term.
2947*/
2948 dd++;
2949 thr->totnum++;
2950 tt += *tt;
2951 }
2952 }
2953 }
2954 thr->ddterms = dd; /* total number of terms including keep brackets */
2955 thr->firstterm = AN0.ninterms;
2956 AN0.ninterms += dd;
2957 *tt = 0; /* mark end of bucket */
2958 thr->free = BUCKETFILLED;
2959 thr->type = BUCKETDOINGTERMS;
2960 numberoffullbuckets++;
2961 if ( topofavailables <= 0 && endofinput == 0 ) {
2962/*
2963 Problem: topofavailables may already be > 0, but the
2964 thread has not yet gone into waiting. Can the signal get lost?
2965 How can we tell that a thread is waiting for a signal?
2966
2967 All threads are busy. Try to load up another bucket.
2968 In the future we could be more sophisticated.
2969 At the moment we load a complete bucket which could be
2970 1000 terms or even more.
2971 In principle it is better to keep a full bucket ready
2972 and check after each term we put in the next bucket. That
2973 way we don't waste time of the workers.
2974*/
2975 for ( j = 0; j < numthreadbuckets; j++ ) {
2976 switch ( threadbuckets[j]->free ) {
2977 case BUCKETFREE:
2978 thr = threadbuckets[j];
2979 if ( !endofinput ) goto NextBucket;
2980/*
2981 If we are at the end of the input we mark
2982 the free buckets in a special way. That way
2983 we don't keep running into them.
2984*/
2985 thr->free = BUCKETATEND;
2986 break;
2987 case BUCKETCOMINGFREE:
2988 thr = threadbuckets[j];
2989 thr->free = BUCKETFREE;
2990/*
2991 Bucket has just been finished.
2992 Put at the end of the list. We don't want
2993 an early bucket to wait to be treated last.
2994*/
2995 for ( k = j+1; k < numthreadbuckets; k++ )
2996 threadbuckets[k-1] = threadbuckets[k];
2997 threadbuckets[numthreadbuckets-1] = thr;
2998 j--; /* we have to redo the same number j. */
2999 break;
3000 default:
3001 break;
3002 }
3003 }
3004/*
3005 We have no free bucket or we are at the end.
3006 The only thing we can do now is wait for a worker to come free,
3007 provided there are still buckets to send.
3008*/
3009 }
3010/*
3011 Look for the next bucket to send. There is at least one full bucket!
3012*/
3013 for ( j = 0; j < numthreadbuckets; j++ ) {
3014 if ( threadbuckets[j]->free == BUCKETFILLED ) {
3015 thr = threadbuckets[j];
3016 for ( k = j+1; k < numthreadbuckets; k++ )
3017 threadbuckets[k-1] = threadbuckets[k];
3018 threadbuckets[numthreadbuckets-1] = thr;
3019 break;
3020 }
3021 }
3022/*
3023 Wait for a thread to become available
3024 The bucket we are going to use is in thr.
3025*/
3026DoBucket:;
3027 AN0.ninterms++;
3028 while ( ( id = GetAvailableThread() ) < 0 ) { MasterWait(); }
3029/*
3030 Prepare the thread. Give it the term and variables.
3031*/
3032 LoadOneThread(0,id,thr,0);
3033 LOCK(thr->lock);
3034 thr->busy = BUCKETASSIGNED;
3035 UNLOCK(thr->lock);
3036 thr->free = BUCKETINUSE;
3037 numberoffullbuckets--;
3038/*
3039 And signal the thread to run.
3040 Form now on we may only interfere with this bucket
3041 1: after it has been marked BUCKETCOMINGFREE
3042 2: when thr->busy == BUCKETDOINGTERM and then only when protected by
3043 thr->lock. This would be for load balancing.
3044*/
3045 WakeupThread(id,LOWESTLEVELGENERATION);
3046/* AN0.ninterms += thr->ddterms; */
3047/*
3048 Now look whether there is another bucket filled and a worker available
3049*/
3050 if ( topofavailables > 0 ) { /* there is a worker */
3051 for ( j = 0; j < numthreadbuckets; j++ ) {
3052 if ( threadbuckets[j]->free == BUCKETFILLED ) {
3053 thr = threadbuckets[j];
3054 for ( k = j+1; k < numthreadbuckets; k++ )
3055 threadbuckets[k-1] = threadbuckets[k];
3056 threadbuckets[numthreadbuckets-1] = thr;
3057 goto DoBucket; /* and we found a bucket */
3058 }
3059 }
3060/*
3061 no bucket is loaded but there is a thread available
3062 find a bucket to load. If there is none (all are USED or ATEND)
3063 we jump out of the loop.
3064*/
3065 for ( j = 0; j < numthreadbuckets; j++ ) {
3066 switch ( threadbuckets[j]->free ) {
3067 case BUCKETFREE:
3068 thr = threadbuckets[j];
3069 if ( !endofinput ) goto NextBucket;
3070 thr->free = BUCKETATEND;
3071 break;
3072 case BUCKETCOMINGFREE:
3073 thr = threadbuckets[j];
3074 if ( endofinput ) {
3075 thr->free = BUCKETATEND;
3076 }
3077 else {
3078 thr->free = BUCKETFREE;
3079 for ( k = j+1; k < numthreadbuckets; k++ )
3080 threadbuckets[k-1] = threadbuckets[k];
3081 threadbuckets[numthreadbuckets-1] = thr;
3082 j--;
3083 }
3084 break;
3085 default:
3086 break;
3087 }
3088 }
3089 if ( j >= numthreadbuckets ) break;
3090 }
3091 else {
3092/*
3093 No worker available.
3094 Look for a bucket to load.
3095 Its number will be in "still"
3096*/
3097Finalize:;
3098 still = -1;
3099 for ( j = 0; j < numthreadbuckets; j++ ) {
3100 switch ( threadbuckets[j]->free ) {
3101 case BUCKETFREE:
3102 thr = threadbuckets[j];
3103 if ( !endofinput ) goto NextBucket;
3104 thr->free = BUCKETATEND;
3105 break;
3106 case BUCKETCOMINGFREE:
3107 thr = threadbuckets[j];
3108 if ( endofinput ) thr->free = BUCKETATEND;
3109 else {
3110 thr->free = BUCKETFREE;
3111 for ( k = j+1; k < numthreadbuckets; k++ )
3112 threadbuckets[k-1] = threadbuckets[k];
3113 threadbuckets[numthreadbuckets-1] = thr;
3114 j--;
3115 }
3116 break;
3117 case BUCKETFILLED:
3118 if ( still < 0 ) still = j;
3119 break;
3120 default:
3121 break;
3122 }
3123 }
3124 if ( still < 0 ) {
3125/*
3126 No buckets to be executed and no buckets FREE.
3127 We must be at the end. Break out of the loop.
3128*/
3129 break;
3130 }
3131 thr = threadbuckets[still];
3132 for ( k = still+1; k < numthreadbuckets; k++ )
3133 threadbuckets[k-1] = threadbuckets[k];
3134 threadbuckets[numthreadbuckets-1] = thr;
3135 goto DoBucket;
3136 }
3137NextBucket:;
3138 }
3139/*
3140 Now the stage one load balancing.
3141 If the load has been readjusted we have again filled buckets.
3142 In that case we jump back in the loop.
3143
3144 Tricky point: when do the workers see the new value of AT.LoadBalancing?
3145 It should activate the locks on thr->busy
3146*/
3147 if ( AC.ThreadBalancing ) {
3148 for ( id = 1; id <= numberofworkers; id++ ) {
3149 AB[id]->T.LoadBalancing = 1;
3150 }
3151 if ( LoadReadjusted() ) goto Finalize;
3152 for ( id = 1; id <= numberofworkers; id++ ) {
3153 AB[id]->T.LoadBalancing = 0;
3154 }
3155 }
3156 if ( AC.ThreadBalancing ) {
3157/*
3158 The AS.Balancing flag should have Generator look for
3159 free workers and apply the "buro" method.
3160
3161 There is still a serious problem.
3162 When for instance a sum_, there may be space created in a local
3163 compiler buffer for a wildcard substitution or whatever.
3164 Compiler buffer execution scribble space.....
3165 This isn't copied along?
3166 Look up ebufnum. There are 12 places with AddRHS!
3167 Problem: one process allocates in ebuf. Then term is given to
3168 other process. It would like to use from this ebuf, but the sender
3169 finishes first and removes the ebuf (and/or overwrites it).
3170
3171 Other problem: local $ variables aren't copied along.
3172*/
3173 AS.Balancing = 0;
3174 }
3175 MasterWaitAll();
3176 AS.Balancing = 0;
3177/*
3178 When we deal with the last expression we can now remove the input
3179 scratch file. This saves potentially much disk space (up to 1/3)
3180*/
3181 if ( LastExpression ) {
3182 UpdateMaxSize();
3183 if ( AR0.infile->handle >= 0 ) {
3184 CloseFile(AR0.infile->handle);
3185 AR0.infile->handle = -1;
3186 remove(AR0.infile->name);
3187 PUTZERO(AR0.infile->POposition);
3188 AR0.infile->POfill = AR0.infile->POfull = AR0.infile->PObuffer;
3189 }
3190 }
3191/*
3192 We order the threads to finish in the MasterMerge routine
3193 It will start with waiting for all threads to finish.
3194 One could make an administration in which threads that have
3195 finished can start already with the final sort but
3196 1: The load balancing should not make this super urgent
3197 2: It would definitely not be very compatible with the second
3198 stage load balancing.
3199*/
3200 oldgzipCompress = AR0.gzipCompress;
3201 AR0.gzipCompress = 0;
3202 if ( AR0.outtohide ) AR0.outfile = AR0.hidefile;
3203 if ( MasterMerge() < 0 ) {
3204 if ( AR0.outtohide ) AR0.outfile = oldoutfile;
3205 AR0.gzipCompress = oldgzipCompress;
3206 goto ProcErr;
3207 }
3208 if ( AR0.outtohide ) AR0.outfile = oldoutfile;
3209 AR0.gzipCompress = oldgzipCompress;
3210/*
3211 Now wait for all threads to be ready to give them the cleaning up signal.
3212 With the new MasterMerge routine we can do the cleanup already automatically
3213 avoiding having to send these signals.
3214*/
3215 MasterWaitAll();
3216 AR0.sLevel--;
3217 for ( id = 1; id < AM.totalnumberofthreads; id++ ) {
3218 if ( GetThread(id) > 0 ) WakeupThread(id,CLEANUPEXPRESSION);
3219 }
3220 e->numdummies = 0;
3221 for ( id = 1; id < AM.totalnumberofthreads; id++ ) {
3222 if ( AB[id]->R.MaxDum - AM.IndDum > e->numdummies )
3223 e->numdummies = AB[id]->R.MaxDum - AM.IndDum;
3224 AR0.expchanged |= AB[id]->R.expchanged;
3225 }
3226/*
3227 And wait for all to be clean.
3228*/
3229 MasterWaitAll();
3230 AT0.WorkPointer = oldworkpointer;
3231 return(0);
3232ProcErr:;
3233 return(-1);
3234}
3235
3236/*
3237 #] ThreadsProcessor :
3238 #[ LoadReadjusted :
3239*/
3258int LoadReadjusted(void)
3259{
3260 ALLPRIVATES *B0 = AB[0];
3261 THREADBUCKET *thr = 0, *thrtogo = 0;
3262 int numtogo, numfree, numbusy, n, nperbucket, extra, i, j, u, bus;
3263 LONG numinput;
3264 WORD *t1, *c1, *t2, *c2, *t3;
3265/*
3266 Start with waiting for at least one free processor.
3267 We don't want the master competing for time when all are busy.
3268*/
3269 while ( topofavailables <= 0 ) MasterWait();
3270/*
3271 Now look for the fullest bucket and make a list of free buckets
3272 The bad part is that most numbers can change at any moment.
3273*/
3274restart:;
3275 numtogo = 0;
3276 numfree = 0;
3277 numbusy = 0;
3278 for ( j = 0; j < numthreadbuckets; j++ ) {
3279 thr = threadbuckets[j];
3280 if ( thr->free == BUCKETFREE || thr->free == BUCKETATEND
3281 || thr->free == BUCKETCOMINGFREE ) {
3282 freebuckets[numfree++] = thr;
3283 }
3284 else if ( thr->type != BUCKETDOINGTERMS ) {}
3285 else if ( thr->totnum > 1 ) { /* never steal from a bucket with one term */
3286 LOCK(thr->lock);
3287 bus = thr->busy;
3288 UNLOCK(thr->lock);
3289 if ( thr->free == BUCKETINUSE ) {
3290 n = thr->totnum-thr->usenum;
3291 if ( bus == BUCKETASSIGNED ) numbusy++;
3292 else if ( ( bus != BUCKETASSIGNED )
3293 && ( n > numtogo ) ) {
3294 numtogo = n;
3295 thrtogo = thr;
3296 }
3297 }
3298 else if ( bus == BUCKETTOBERELEASED
3299 && thr->free == BUCKETRELEASED ) {
3300 freebuckets[numfree++] = thr;
3301 thr->free = BUCKETATEND;
3302 LOCK(thr->lock);
3303 thr->busy = BUCKETPREPARINGTERM;
3304 UNLOCK(thr->lock);
3305 }
3306 }
3307 }
3308 if ( numfree == 0 ) return(0); /* serious problem */
3309 if ( numtogo > 0 ) { /* provisionally there is something to be stolen */
3310 thr = thrtogo;
3311/*
3312 If the number has changed there is good progress.
3313 Maybe there is another thread that needs assistance.
3314 We start all over.
3315*/
3316 if ( thr->totnum-thr->usenum < numtogo ) goto restart;
3317/*
3318 If the thread is in the term loading phase
3319 (thr->busy == BUCKETPREPARINGTERM) we better stay away from it.
3320 We wait now for the thread to be busy, and don't allow it
3321 now to drop out of this state till we are done here.
3322 This all depends on whether AT.LoadBalancing == 1 is seen by
3323 the thread.
3324*/
3325 LOCK(thr->lock);
3326 if ( thr->busy != BUCKETDOINGTERM ) {
3327 UNLOCK(thr->lock);
3328 goto restart;
3329 }
3330 if ( thr->totnum-thr->usenum < numtogo ) {
3331 UNLOCK(thr->lock);
3332 goto restart;
3333 }
3334 thr->free = BUCKETTERMINATED;
3335/*
3336 The above will signal the thread we want to terminate.
3337 Next all effort goes into making sure the landing is soft.
3338 Unfortunately we don't want to wait for a signal, because the thread
3339 may be working for a long time on a single term.
3340*/
3341 if ( thr->usenum == thr->totnum ) {
3342/*
3343 Terminated in the mean time or by now working on the
3344 last term. Try again.
3345*/
3346 thr->free = BUCKETATEND;
3347 UNLOCK(thr->lock);
3348 goto restart;
3349 }
3350 goto intercepted;
3351 }
3352/* This has always been commented. Indeed no lock is held here. */
3353/* UNLOCK(thr->lock); */
3354 if ( numbusy > 0 ) {
3355 /* JD: this avoids large runtimes for tform tests under valgrind.
3356 What seems to happen is we return from here, goto Finalize, and
3357 end up in LoadReadjusted again without the threads having a
3358 chance to update their busy status. Then we end up here again.
3359 Sleep the thread for, say, 1us to allow threads to aquire the lock. */
3360 struct timespec sleeptime;
3361 sleeptime.tv_sec = 0;
3362 sleeptime.tv_nsec = 1000L;
3363 nanosleep(&sleeptime, NULL);
3364 return(1); /* Wait a bit.... */
3365 }
3366 return(0);
3367intercepted:;
3368/*
3369 We intercepted one successfully. Now it becomes interesting. Action:
3370 1: determine how many terms per free bucket.
3371 2: find the first untreated term.
3372 3: put the terms in the free buckets.
3373
3374 Remember: we still have the lock to avoid interference from the thread
3375 that is being robbed. We were holding it and then jumped here with
3376 goto intercepted.
3377*/
3378 numinput = thr->firstterm + thr->usenum;
3379 nperbucket = numtogo / numfree;
3380 extra = numtogo - nperbucket*numfree;
3381 if ( AR0.DeferFlag ) {
3382 t1 = thr->threadbuffer; c1 = thr->compressbuffer; u = thr->usenum;
3383 for ( n = 0; n < thr->usenum; n++ ) { t1 += *t1; c1 += *c1; }
3384 t3 = t1;
3385 if ( extra > 0 ) {
3386 for ( i = 0; i < extra; i++ ) {
3387 thrtogo = freebuckets[i];
3388 t2 = thrtogo->threadbuffer;
3389 c2 = thrtogo->compressbuffer;
3390 thrtogo->free = BUCKETFILLED;
3391 thrtogo->type = BUCKETDOINGTERMS;
3392 thrtogo->totnum = nperbucket+1;
3393 thrtogo->ddterms = 0;
3394 thrtogo->usenum = 0;
3395 thrtogo->busy = BUCKETASSIGNED;
3396 thrtogo->firstterm = numinput;
3397 numinput += nperbucket+1;
3398 for ( n = 0; n <= nperbucket; n++ ) {
3399 j = *t1; NCOPY(t2,t1,j);
3400 j = *c1; NCOPY(c2,c1,j);
3401 thrtogo->deferbuffer[n] = thr->deferbuffer[u++];
3402 }
3403 *t2 = *c2 = 0;
3404 }
3405 }
3406 if ( nperbucket > 0 ) {
3407 for ( i = extra; i < numfree; i++ ) {
3408 thrtogo = freebuckets[i];
3409 t2 = thrtogo->threadbuffer;
3410 c2 = thrtogo->compressbuffer;
3411 thrtogo->free = BUCKETFILLED;
3412 thrtogo->type = BUCKETDOINGTERMS;
3413 thrtogo->totnum = nperbucket;
3414 thrtogo->ddterms = 0;
3415 thrtogo->usenum = 0;
3416 thrtogo->busy = BUCKETASSIGNED;
3417 thrtogo->firstterm = numinput;
3418 numinput += nperbucket;
3419 for ( n = 0; n < nperbucket; n++ ) {
3420 j = *t1; NCOPY(t2,t1,j);
3421 j = *c1; NCOPY(c2,c1,j);
3422 thrtogo->deferbuffer[n] = thr->deferbuffer[u++];
3423 }
3424 *t2 = *c2 = 0;
3425 }
3426 }
3427 }
3428 else {
3429 t1 = thr->threadbuffer;
3430 for ( n = 0; n < thr->usenum; n++ ) { t1 += *t1; }
3431 t3 = t1;
3432 if ( extra > 0 ) {
3433 for ( i = 0; i < extra; i++ ) {
3434 thrtogo = freebuckets[i];
3435 t2 = thrtogo->threadbuffer;
3436 thrtogo->free = BUCKETFILLED;
3437 thrtogo->type = BUCKETDOINGTERMS;
3438 thrtogo->totnum = nperbucket+1;
3439 thrtogo->ddterms = 0;
3440 thrtogo->usenum = 0;
3441 thrtogo->busy = BUCKETASSIGNED;
3442 thrtogo->firstterm = numinput;
3443 numinput += nperbucket+1;
3444 for ( n = 0; n <= nperbucket; n++ ) {
3445 j = *t1; NCOPY(t2,t1,j);
3446 }
3447 *t2 = 0;
3448 }
3449 }
3450 if ( nperbucket > 0 ) {
3451 for ( i = extra; i < numfree; i++ ) {
3452 thrtogo = freebuckets[i];
3453 t2 = thrtogo->threadbuffer;
3454 thrtogo->free = BUCKETFILLED;
3455 thrtogo->type = BUCKETDOINGTERMS;
3456 thrtogo->totnum = nperbucket;
3457 thrtogo->ddterms = 0;
3458 thrtogo->usenum = 0;
3459 thrtogo->busy = BUCKETASSIGNED;
3460 thrtogo->firstterm = numinput;
3461 numinput += nperbucket;
3462 for ( n = 0; n < nperbucket; n++ ) {
3463 j = *t1; NCOPY(t2,t1,j);
3464 }
3465 *t2 = 0;
3466 }
3467 }
3468 }
3469 *t3 = 0; /* This is some form of extra insurance */
3470 if ( thr->free == BUCKETRELEASED && thr->busy == BUCKETTOBERELEASED ) {
3471 thr->free = BUCKETATEND; thr->busy = BUCKETPREPARINGTERM;
3472 }
3473 UNLOCK(thr->lock);
3474 return(1);
3475}
3476
3477/*
3478 #] LoadReadjusted :
3479 #[ SortStrategy :
3480*/
3513/*
3514 #] SortStrategy :
3515 #[ PutToMaster :
3516*/
3539int PutToMaster(PHEAD WORD *term)
3540{
3541 int i,j,nexti,ret = 0;
3542 int urgent = 0;
3543 WORD *t, *fill, *top, zero = 0;
3544 if ( term == 0 ) { /* Mark the end of the expression */
3545 t = &zero; j = 1;
3546 }
3547 else {
3548 t = term; ret = j = *term;
3549 if ( j == 0 ) { j = 1; } /* Just in case there is a spurious end */
3550 }
3551 i = AT.SB.FillBlock; /* The block we are working at */
3552 fill = AT.SB.MasterFill[i]; /* Where we are filling */
3553 top = AT.SB.MasterStop[i]; /* End of the block */
3554
3555 // If there is space in the block, and we have already written MINWRITENUMBEROFTERMS,
3556 // determine if the reading thread is waiting for us by trying to lock the previous
3557 // block. If we manage to lock it, then we still have time to continue filling this
3558 // block. If we can't lock it, the reading thread is waiting for us and we should
3559 // move to the next block ASAP.
3560 if ( j < top - fill && AT.SB.BlockTerms[i] > MINWRITENUMBEROFTERMS ) {
3561 const int prev = ( i == 1 ? AT.SB.MasterNumBlocks : i-1 );
3562 if ( ! pthread_mutex_trylock(&(AT.SB.MasterBlockLock[prev])) ) {
3563 UNLOCK(AT.SB.MasterBlockLock[prev]);
3564 }
3565 else {
3566 urgent = 1;
3567 }
3568 }
3569
3570 // If the term doesn't fit in the current block, or a thread is waiting for us
3571 // (and we've already written at least MINWRITENUMBEROFTERMS), move to the next:
3572 if ( ( j >= top - fill ) || urgent ) {
3573 nexti = i+1;
3574 if ( nexti > AT.SB.MasterNumBlocks ) {
3575 nexti = 1;
3576 }
3577 LOCK(AT.SB.MasterBlockLock[nexti]);
3578 UNLOCK(AT.SB.MasterBlockLock[i]);
3579 AT.SB.MasterFill[i] = AT.SB.MasterStart[i];
3580 AT.SB.FillBlock = i = nexti;
3581 fill = AT.SB.MasterStart[i];
3582 top = AT.SB.MasterStop[i];
3583 if ( AT.SB.BlockTerms[i] != 0 ) {
3584 // In this case, there has been an accounting error in a previous use
3585 // of this block. Blocks that have been read from and unlocked, should
3586 // have BlockTerms == 0.
3587 MLOCK(ErrorMessageLock);
3588 MesPrint("Error in PutToMaster, starting a block with BlockTerms != 0");
3589 MUNLOCK(ErrorMessageLock);
3590 Terminate(-1);
3591 }
3592 }
3593
3594 NCOPY(fill, t, j);
3595 AT.SB.BlockTerms[i]++;
3596 AT.SB.MasterFill[i] = fill;
3597 return(ret);
3598}
3599
3600/*
3601 #] PutToMaster :
3602 #[ SortBotOut :
3603*/
3604
3605#ifdef WITHSORTBOTS
3606
3615int
3616SortBotOut(PHEAD WORD *term)
3617{
3618 WORD im;
3619
3620 if ( AT.identity != 0 ) return(PutToMaster(BHEAD term));
3621
3622 if ( term == 0 ) {
3623 if ( FlushOut(&SortBotPosition,AR.outfile,1) ) return(-1);
3624 ADDPOS(AT.SS->SizeInFile[0],1);
3625 return(0);
3626 }
3627 else {
3628 numberofterms++;
3629 if ( ( im = PutOut(BHEAD term,&SortBotPosition,AR.outfile,1) ) < 0 ) {
3630 MLOCK(ErrorMessageLock);
3631 MesPrint("Called from MasterMerge/SortBotOut");
3632 MUNLOCK(ErrorMessageLock);
3633 return(-1);
3634 }
3635 ADDPOS(AT.SS->SizeInFile[0],im);
3636 return(im);
3637 }
3638}
3639
3640#endif
3641
3642/*
3643 #] SortBotOut :
3644 #[ MasterMerge :
3645*/
3663int MasterMerge(void)
3664{
3665 ALLPRIVATES *B0 = AB[0], *B = 0;
3666 SORTING *S = AT0.SS;
3667 WORD **poin, **poin2, ul, k, i, im, *m1, j;
3668 WORD lpat, mpat, level, l1, l2, r1, r2, r3, c;
3669 WORD *m2, *m3, r31, r33, ki, *rr;
3670 UWORD *coef;
3671 POSITION position;
3672 FILEHANDLE *fin, *fout;
3673#ifdef WITHSORTBOTS
3674 if ( numberofworkers > 2 ) return(SortBotMasterMerge());
3675#endif
3676 fin = &S->file;
3677 if ( AR0.PolyFun == 0 ) { S->PolyFlag = 0; }
3678 else if ( AR0.PolyFunType == 1 ) { S->PolyFlag = 1; }
3679 else if ( AR0.PolyFunType == 2 ) {
3680 if ( AR0.PolyFunExp == 2
3681 || AR0.PolyFunExp == 3 ) S->PolyFlag = 1;
3682 else S->PolyFlag = 2;
3683 }
3684 S->TermsLeft = 0;
3685 coef = AN0.SoScratC;
3686 poin = S->poina; poin2 = S->poin2a;
3687 rr = AR0.CompressPointer;
3688 *rr = 0;
3689/*
3690 #[ Setup :
3691*/
3692 S->inNum = numberofthreads;
3693 fout = AR0.outfile;
3694/*
3695 Load the patches. The threads have to finish their sort first.
3696*/
3697 S->lPatch = S->inNum - 1;
3698/*
3699 Claim all zero blocks. We need them anyway.
3700 In principle the workers should never get into these.
3701 We also claim all last blocks. This is a safety procedure that
3702 should prevent the workers from working their way around the clock
3703 before the master gets started again.
3704*/
3705 AS.MasterSort = 1;
3706 numberclaimed = 0;
3707 for ( i = 1; i <= S->lPatch; i++ ) {
3708 B = AB[i];
3709 LOCK(AT.SB.MasterBlockLock[0]);
3710 LOCK(AT.SB.MasterBlockLock[AT.SB.MasterNumBlocks]);
3711 }
3712/*
3713 Now wake up the threads and have them start their final sorting.
3714 They should start with claiming their block and the master is
3715 not allowed to continue until that has been done.
3716 This waiting of the master will be done below in MasterWaitAllBlocks
3717*/
3718 for ( i = 0; i < S->lPatch; i++ ) {
3719 GetThread(i+1);
3720 WakeupThread(i+1,FINISHEXPRESSION);
3721 }
3722/*
3723 Prepare the output file.
3724*/
3725 if ( fout->handle >= 0 ) {
3726 PUTZERO(position);
3727 SeekFile(fout->handle,&position,SEEK_END);
3728 ADDPOS(position,((fout->POfill-fout->PObuffer)*sizeof(WORD)));
3729 }
3730 else {
3731 SETBASEPOSITION(position,(fout->POfill-fout->PObuffer)*sizeof(WORD));
3732 }
3733/*
3734 Wait for all threads to finish loading their first block.
3735*/
3736 MasterWaitAllBlocks();
3737/*
3738 Claim all first blocks.
3739 We don't release the last blocks.
3740 The strategy is that we always keep the previous block.
3741 In principle it looks like it isn't needed for the last block but
3742 actually it is to keep the front from overrunning the tail when writing.
3743*/
3744 for ( i = 1; i <= S->lPatch; i++ ) {
3745 B = AB[i];
3746 LOCK(AT.SB.MasterBlockLock[1]);
3747 AT.SB.MasterBlock = 1;
3748 }
3749/*
3750 #] Setup :
3751
3752 Now construct the tree:
3753*/
3754 lpat = 1;
3755 do { lpat <<= 1; } while ( lpat < S->lPatch );
3756 mpat = ( lpat >> 1 ) - 1;
3757 k = lpat - S->lPatch;
3758/*
3759 k is the number of empty places in the tree. they will
3760 be at the even positions from 2 to 2*k
3761*/
3762 for ( i = 1; i < lpat; i++ ) { S->tree[i] = -1; }
3763 for ( i = 1; i <= k; i++ ) {
3764 im = ( i * 2 ) - 1;
3765 poin[im] = AB[i]->T.SB.MasterStart[AB[i]->T.SB.MasterBlock];
3766 poin2[im] = poin[im] + *(poin[im]);
3767 S->used[i] = im;
3768 S->ktoi[im] = i-1;
3769 S->tree[mpat+i] = 0;
3770 poin[im-1] = poin2[im-1] = 0;
3771 }
3772 for ( i = (k*2)+1; i <= lpat; i++ ) {
3773 S->used[i-k] = i;
3774 S->ktoi[i] = i-k-1;
3775 poin[i] = AB[i-k]->T.SB.MasterStart[AB[i-k]->T.SB.MasterBlock];
3776 poin2[i] = poin[i] + *(poin[i]);
3777 }
3778/*
3779 the array poin tells the position of the i-th element of the S->tree
3780 'S->used' is a stack with the S->tree elements that need to be entered
3781 into the S->tree. at the beginning this is S->lPatch. during the
3782 sort there will be only very few elements.
3783 poin2 is the next value of poin. it has to be determined
3784 before the comparisons as the position or the size of the
3785 term indicated by poin may change.
3786 S->ktoi translates a S->tree element back to its stream number.
3787
3788 start the sort
3789*/
3790 level = S->lPatch;
3791/*
3792 introduce one term
3793*/
3794OneTerm:
3795 k = S->used[level];
3796 i = k + lpat - 1;
3797 if ( !*(poin[k]) ) {
3798 // Stream k has hit the end-of-stream "0". We still need to decrement
3799 // BlockTerms, which includes the marker in the count.
3800 ki = S->ktoi[k];
3801 AB[ki+1]->T.SB.BlockTerms[AB[ki+1]->T.SB.MasterBlock]--;
3802 do {
3803 if ( !( i >>= 1 ) ) {
3804 goto EndOfMerge;
3805 }
3806 } while ( !S->tree[i] );
3807 if ( S->tree[i] == -1 ) {
3808 S->tree[i] = 0;
3809 level--;
3810 goto OneTerm;
3811 }
3812 k = S->tree[i];
3813 S->used[level] = k;
3814 S->tree[i] = 0;
3815 }
3816/*
3817 move terms down the tree
3818*/
3819 while ( i >>= 1 ) {
3820 if ( S->tree[i] > 0 ) {
3821 if ( ( c = CompareTerms(B0, poin[S->tree[i]],poin[k],(WORD)0) ) > 0 ) {
3822/*
3823 S->tree[i] is the smaller. Exchange and go on.
3824*/
3825 S->used[level] = S->tree[i];
3826 S->tree[i] = k;
3827 k = S->used[level];
3828 }
3829 else if ( !c ) { /* Terms are equal */
3830/*
3831 S->TermsLeft--;
3832 Here the terms are equal and their coefficients
3833 have to be added.
3834*/
3835 l1 = *( m1 = poin[S->tree[i]] );
3836 l2 = *( m2 = poin[k] );
3837 if ( S->PolyWise ) { /* Here we work with PolyFun */
3838 WORD *tt1, *w;
3839 tt1 = m1;
3840 m1 += S->PolyWise;
3841 m2 += S->PolyWise;
3842 if ( S->PolyFlag == 2 ) {
3843 w = poly_ratfun_add(B0,m1,m2);
3844 if ( *tt1 + w[1] - m1[1] > AM.MaxTer/((LONG)sizeof(WORD)) ) {
3845 MLOCK(ErrorMessageLock);
3846 MesPrint("Term too complex in PolyRatFun addition. MaxTermSize of %10l is too small",AM.MaxTer);
3847 MUNLOCK(ErrorMessageLock);
3848 Terminate(-1);
3849 }
3850 AT0.WorkPointer = w;
3851 if ( w[FUNHEAD] == -SNUMBER && w[FUNHEAD+1] == 0 && w[1] > FUNHEAD ) {
3852 goto cancelled;
3853 }
3854 }
3855 else {
3856 w = AT0.WorkPointer;
3857 if ( w + m1[1] + m2[1] > AT0.WorkTop ) {
3858 MLOCK(ErrorMessageLock);
3859 MesPrint("MasterMerge: A WorkSpace of %10l is too small",AM.WorkSize);
3860 MUNLOCK(ErrorMessageLock);
3861 Terminate(-1);
3862 }
3863 AddArgs(B0,m1,m2,w);
3864 }
3865 r1 = w[1];
3866 if ( r1 <= FUNHEAD
3867 || ( w[FUNHEAD] == -SNUMBER && w[FUNHEAD+1] == 0 ) )
3868 { goto cancelled; }
3869 if ( r1 == m1[1] ) {
3870 NCOPY(m1,w,r1);
3871 }
3872 else if ( r1 < m1[1] ) {
3873 r2 = m1[1] - r1;
3874 m2 = w + r1;
3875 m1 += m1[1];
3876 while ( --r1 >= 0 ) *--m1 = *--m2;
3877 m2 = m1 - r2;
3878 r1 = S->PolyWise;
3879 while ( --r1 >= 0 ) *--m1 = *--m2;
3880 *m1 -= r2;
3881 poin[S->tree[i]] = m1;
3882 }
3883 else {
3884 // Here we are writing the new merged term *before* the original start of term1.
3885 // We can always do this, since before term1 there is previous term data of this
3886 // block, or the previous block, for which we are holding a lock. This requires
3887 // the existence of "block 0", if term1 is the first term of block 1!
3888 // It also requires the blocks to be contiguous in memory; we can't allocate
3889 // separate memory regions for each block without larger-scale changes.
3890 r2 = r1 - m1[1];
3891 m2 = tt1 - r2;
3892 r1 = S->PolyWise;
3893 m1 = tt1;
3894 *m1 += r2;
3895 poin[S->tree[i]] = m2;
3896 NCOPY(m2,m1,r1);
3897 r1 = w[1];
3898 NCOPY(m2,w,r1);
3899 }
3900 }
3901#ifdef WITHFLOAT
3902 else if ( AT.SortFloatMode ) {
3903 WORD *term1, *term2;
3904 term1 = poin[S->tree[i]];
3905 term2 = poin[k];
3906 if ( MergeWithFloat(B0, &term1,&term2) == 0 )
3907 goto cancelled;
3908 poin[S->tree[i]] = term1;
3909 }
3910#endif
3911 else {
3912 r1 = *( m1 += l1 - 1 );
3913 m1 -= ABS(r1) - 1;
3914 r1 = ( ( r1 > 0 ) ? (r1-1) : (r1+1) ) >> 1;
3915 r2 = *( m2 += l2 - 1 );
3916 m2 -= ABS(r2) - 1;
3917 r2 = ( ( r2 > 0 ) ? (r2-1) : (r2+1) ) >> 1;
3918
3919 if ( AddRat(B0,(UWORD *)m1,r1,(UWORD *)m2,r2,coef,&r3) ) {
3920 MLOCK(ErrorMessageLock);
3921 MesCall("MasterMerge");
3922 MUNLOCK(ErrorMessageLock);
3923 SETERROR(-1)
3924 }
3925
3926 if ( AN.ncmod != 0 ) {
3927 if ( ( AC.modmode & POSNEG ) != 0 ) {
3928 NormalModulus(coef,&r3);
3929 }
3930 else if ( BigLong(coef,r3,(UWORD *)AC.cmod,ABS(AN.ncmod)) >= 0 ) {
3931 WORD ii;
3932 SubPLon(coef,r3,(UWORD *)AC.cmod,ABS(AN.ncmod),coef,&r3);
3933 coef[r3] = 1;
3934 for ( ii = 1; ii < r3; ii++ ) coef[r3+ii] = 0;
3935 }
3936 }
3937 r3 *= 2;
3938 r33 = ( r3 > 0 ) ? ( r3 + 1 ) : ( r3 - 1 );
3939 if ( r3 < 0 ) r3 = -r3;
3940 if ( r1 < 0 ) r1 = -r1;
3941 r1 *= 2;
3942 r31 = r3 - r1;
3943 if ( !r3 ) { /* Terms cancel */
3944cancelled:
3945 ul = S->used[level] = S->tree[i];
3946 S->tree[i] = -1;
3947/*
3948 We skip to the next term in stream ul
3949*/
3950 im = *poin2[ul];
3951 poin[ul] = poin2[ul];
3952 ki = S->ktoi[ul];
3953 AB[ki+1]->T.SB.BlockTerms[AB[ki+1]->T.SB.MasterBlock]--;
3954 if ( AB[ki+1]->T.SB.BlockTerms[AB[ki+1]->T.SB.MasterBlock] == 0 ) {
3955/*
3956 We made it to the end of the block. We have to
3957 release the previous block and claim the next.
3958*/
3959 B = AB[ki+1];
3960 i = AT.SB.MasterBlock;
3961 if ( i == 1 ) {
3962 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterNumBlocks]);
3963 }
3964 else {
3965 UNLOCK(AT.SB.MasterBlockLock[i-1]);
3966 }
3967 if ( i == AT.SB.MasterNumBlocks ) {
3968 i = 1;
3969 }
3970 else { i++; }
3971 LOCK(AT.SB.MasterBlockLock[i]);
3972 AT.SB.MasterBlock = i;
3973 poin[ul] = AT.SB.MasterStart[i];
3974 im = *poin[ul];
3975 poin2[ul] = poin[ul] + im;
3976 }
3977 else {
3978 poin2[ul] += im;
3979 }
3980 S->used[++level] = k;
3981 }
3982 else if ( !r31 ) { /* copy coef into term1 */
3983 goto CopCof2;
3984 }
3985 else if ( r31 < 0 ) { /* copy coef into term1
3986 and adjust the length of term1 */
3987 goto CopCoef;
3988 }
3989 else {
3990/*
3991 this is the dreaded calamity.
3992 is there enough space?
3993*/
3994 if( (poin[S->tree[i]]+l1+r31) >= poin2[S->tree[i]] ) {
3995/*
3996 no space! now the special trick for which
3997 we left 2*maxlng spaces open at the beginning
3998 of each patch.
3999*/
4000 if ( (l1 + r31)*((LONG)sizeof(WORD)) >= AM.MaxTer ) {
4001 MLOCK(ErrorMessageLock);
4002 MesPrint("MasterMerge: Coefficient overflow during sort");
4003 MUNLOCK(ErrorMessageLock);
4004 goto ReturnError;
4005 }
4006 m2 = poin[S->tree[i]];
4007 m3 = ( poin[S->tree[i]] -= r31 );
4008 do { *m3++ = *m2++; } while ( m2 < m1 );
4009 m1 = m3;
4010 }
4011CopCoef:
4012 *(poin[S->tree[i]]) += r31;
4013CopCof2:
4014 m2 = (WORD *)coef; im = r3;
4015 NCOPY(m1,m2,im);
4016 *m1 = r33;
4017 }
4018 }
4019/*
4020 Now skip to the next term in stream k.
4021*/
4022NextTerm:
4023 im = poin2[k][0];
4024 poin[k] = poin2[k];
4025 ki = S->ktoi[k];
4026 AB[ki+1]->T.SB.BlockTerms[AB[ki+1]->T.SB.MasterBlock]--;
4027 if ( AB[ki+1]->T.SB.BlockTerms[AB[ki+1]->T.SB.MasterBlock] == 0 ) {
4028/*
4029 We made it to the end of the block. We have to
4030 release the previous block and claim the next.
4031*/
4032 B = AB[ki+1];
4033 i = AT.SB.MasterBlock;
4034 if ( i == 1 ) {
4035 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterNumBlocks]);
4036 }
4037 else {
4038 UNLOCK(AT.SB.MasterBlockLock[i-1]);
4039 }
4040 if ( i == AT.SB.MasterNumBlocks ) {
4041 i = 1;
4042 }
4043 else { i++; }
4044 LOCK(AT.SB.MasterBlockLock[i]);
4045 AT.SB.MasterBlock = i;
4046 poin[k] = AT.SB.MasterStart[i];
4047 im = *poin[k];
4048 poin2[k] = poin[k] + im;
4049 }
4050 else {
4051 poin2[k] += im;
4052 }
4053 goto OneTerm;
4054 }
4055 }
4056 else if ( S->tree[i] < 0 ) {
4057 S->tree[i] = k;
4058 level--;
4059 goto OneTerm;
4060 }
4061 }
4062/*
4063 found the smallest in the set. indicated by k.
4064 write to its destination.
4065*/
4066 S->TermsLeft++;
4067 if ( ( im = PutOut(B0,poin[k],&position,fout,1) ) < 0 ) {
4068 MLOCK(ErrorMessageLock);
4069 MesPrint("Called from MasterMerge with k = %d (stream %d)",k,S->ktoi[k]);
4070 MUNLOCK(ErrorMessageLock);
4071 goto ReturnError;
4072 }
4073 ADDPOS(S->SizeInFile[0],im);
4074 goto NextTerm;
4075EndOfMerge:
4076 if ( FlushOut(&position,fout,1) ) goto ReturnError;
4077 ADDPOS(S->SizeInFile[0],1);
4078 CloseFile(fin->handle);
4079 remove(fin->name);
4080 fin->handle = -1;
4081 position = S->SizeInFile[0];
4082 MULPOS(position,sizeof(WORD));
4083
4084 // Collect global sort statistics information from the threads.
4085 // The total GenTerms is the sum of the thread GenTerms.
4086 // The total small/large buffer sort info is the sum of the thread info.
4087 // The total comparison count is the sum of the thread counts.
4088 // The total unsorted size is the sum of the total generated terms sizes
4089 S->GenTerms = 0;
4090 for ( j = 1; j <= numberofworkers; j++ ) {
4091 S->GenTerms += AB[j]->T.SS->GenTerms;
4092 S->verbComparisons += AB[j]->T.SS->verbComparisons;
4093 S->verbSBsortTerms += AB[j]->T.SS->verbSBsortTerms;
4094 S->verbSBsortCap += AB[j]->T.SS->verbSBsortCap;
4095 S->verbLBsortPatches += AB[j]->T.SS->verbLBsortPatches;
4096 S->verbLBsortCap += AB[j]->T.SS->verbLBsortCap;
4097 S->verbUnsortedSize += AB[j]->T.SS->verbUnsortedSize;
4098 }
4099
4100 WriteStats(&position,STATSPOSTSORT,NOCHECKLOGTYPE);
4101 Expressions[AR0.CurExpr].counter = S->TermsLeft;
4102 Expressions[AR0.CurExpr].size = position;
4103/*
4104 Release all locks
4105*/
4106 for ( i = 1; i <= S->lPatch; i++ ) {
4107 B = AB[i];
4108 UNLOCK(AT.SB.MasterBlockLock[0]);
4109 if ( AT.SB.MasterBlock == 1 ) {
4110 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterNumBlocks]);
4111 }
4112 else {
4113 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterBlock-1]);
4114 }
4115 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterBlock]);
4116 }
4117 AS.MasterSort = 0;
4118 return(0);
4119ReturnError:
4120 for ( i = 1; i <= S->lPatch; i++ ) {
4121 B = AB[i];
4122 UNLOCK(AT.SB.MasterBlockLock[0]);
4123 if ( AT.SB.MasterBlock == 1 ) {
4124 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterNumBlocks]);
4125 }
4126 else {
4127 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterBlock-1]);
4128 }
4129 UNLOCK(AT.SB.MasterBlockLock[AT.SB.MasterBlock]);
4130 }
4131 AS.MasterSort = 0;
4132 return(-1);
4133}
4134
4135/*
4136 #] MasterMerge :
4137 #[ SortBotMasterMerge :
4138*/
4139
4140#ifdef WITHSORTBOTS
4141
4157int SortBotMasterMerge(void)
4158{
4159 FILEHANDLE *fin, *fout;
4160 ALLPRIVATES *B = AB[0], *BB;
4161 POSITION position;
4162 SORTING *S = AT.SS;
4163 int i, j;
4164/*
4165 Get the sortbots get to claim their writing blocks.
4166 We have to wait till all have been claimed because they also have to
4167 claim the last writing blocks of the workers to prevent the head of
4168 the circular buffer to overrun the tail.
4169
4170 Before waiting we can do some needed initializations.
4171 Also the master has to claim the last writing blocks of its input.
4172*/
4173 topsortbotavailables = 0;
4174 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ ) {
4175 WakeupThread(i,INISORTBOT);
4176 }
4177
4178 AS.MasterSort = 1;
4179 fout = AR.outfile;
4180 numberofterms = 0;
4181 AR.CompressPointer[0] = 0;
4182 numberclaimed = 0;
4183 BB = AB[AT.SortBotIn1];
4184 LOCK(BB->T.SB.MasterBlockLock[BB->T.SB.MasterNumBlocks]);
4185 BB = AB[AT.SortBotIn2];
4186 LOCK(BB->T.SB.MasterBlockLock[BB->T.SB.MasterNumBlocks]);
4187
4188 MasterWaitAllSortBots();
4189/*
4190 Now we can start up the workers. They will claim their writing blocks.
4191 Here the master will wait till all writing blocks have been claimed.
4192*/
4193 for ( i = 1; i <= numberofworkers; i++ ) {
4194 j = GetThread(i);
4195 WakeupThread(i,FINISHEXPRESSION);
4196 }
4197/*
4198 Prepare the output file in the mean time.
4199*/
4200 if ( fout->handle >= 0 ) {
4201 PUTZERO(SortBotPosition);
4202 SeekFile(fout->handle,&SortBotPosition,SEEK_END);
4203 ADDPOS(SortBotPosition,((fout->POfill-fout->PObuffer)*sizeof(WORD)));
4204 }
4205 else {
4206 SETBASEPOSITION(SortBotPosition,(fout->POfill-fout->PObuffer)*sizeof(WORD));
4207 }
4208 MasterWaitAllBlocks();
4209/*
4210 Now we can start the sortbots after which the master goes in
4211 sortbot mode to do its part of the job (the very final merge and
4212 the writing to output file).
4213*/
4214 topsortbotavailables = 0;
4215 for ( i = numberofworkers+1; i <= numberofworkers+numberofsortbots; i++ ) {
4216 WakeupThread(i,RUNSORTBOT);
4217 }
4218 if ( SortBotMerge(BHEAD0) ) {
4219 MLOCK(ErrorMessageLock);
4220 MesPrint("Called from SortBotMasterMerge");
4221 MUNLOCK(ErrorMessageLock);
4222 AS.MasterSort = 0;
4223 return(-1);
4224 }
4225/*
4226 And next the cleanup
4227*/
4228 if ( S->file.handle >= 0 )
4229 {
4230 fin = &S->file;
4231 CloseFile(fin->handle);
4232 remove(fin->name);
4233 fin->handle = -1;
4234 }
4235 position = S->SizeInFile[0];
4236 MULPOS(position,sizeof(WORD));
4237
4238 // Collect global sort statistics information from the threads.
4239 // The total GenTerms is the sum of the thread GenTerms.
4240 // The total small/large buffer sort info is the sum of the thread info.
4241 // The total comparison count is the sum of the thread and sortbot counts.
4242 // The total unsorted size is the sum of the total generated terms sizes
4243 S->GenTerms = 0;
4244 for ( j = 1; j <= numberofworkers; j++ ) {
4245 S->GenTerms += AB[j]->T.SS->GenTerms;
4246 S->verbComparisons += AB[j]->T.SS->verbComparisons;
4247 S->verbSBsortTerms += AB[j]->T.SS->verbSBsortTerms;
4248 S->verbSBsortCap += AB[j]->T.SS->verbSBsortCap;
4249 S->verbLBsortPatches += AB[j]->T.SS->verbLBsortPatches;
4250 S->verbLBsortCap += AB[j]->T.SS->verbLBsortCap;
4251 S->verbUnsortedSize += AB[j]->T.SS->verbUnsortedSize;
4252 }
4253 for ( j = numberofworkers+1; j <= numberofworkers+numberofsortbots; j++ ) {
4254 S->verbComparisons += AB[j]->T.SS->verbComparisons;
4255 }
4256
4257 S->TermsLeft = numberofterms;
4258 WriteStats(&position,STATSPOSTSORT,NOCHECKLOGTYPE);
4259 Expressions[AR.CurExpr].counter = S->TermsLeft;
4260 Expressions[AR.CurExpr].size = position;
4261 AS.MasterSort = 0;
4262/*
4263 The next statement is to prevent one of the sortbots not having
4264 completely cleaned up before the next module starts.
4265 If this statement is omitted every once in a while one of the sortbots
4266 is still running when the next expression starts and misses its
4267 initialization. The result is usually disastrous.
4268*/
4269 MasterWaitAllSortBots();
4270
4271 return(0);
4272}
4273
4274#endif
4275
4276/*
4277 #] SortBotMasterMerge :
4278 #[ SortBotMerge :
4279*/
4280
4281#ifdef WITHSORTBOTS
4282
4288int SortBotMerge(PHEAD0)
4289{
4290 GETBIDENTITY
4291 ALLPRIVATES *Bin1 = AB[AT.SortBotIn1],*Bin2 = AB[AT.SortBotIn2];
4292 WORD *term1, *term2, *wp;
4293 int blin1, blin2; /* Current block numbers */
4294 int error = 0;
4295 WORD l1, l2, *m1, *m2, *w, r1, r2, r3, r33, r31, *tt1, ii;
4296 WORD *to, *from, im, c;
4297 UWORD *coef;
4298 SORTING *S = AT.SS;
4299#ifdef WITHFLOAT
4300 WORD *fun1, *fun2, *fun3, *tstop1, *tstop2, size1, size2, size3, l3, jj, *m3;
4301#endif
4302/*
4303 Set the pointers to the input terms and the output space
4304*/
4305 coef = AN.SoScratC;
4306 blin1 = 1;
4307 blin2 = 1;
4308 if ( AT.identity == 0 ) {
4309 wp = AT.WorkPointer;
4310 }
4311 else {
4312 wp = AT.WorkPointer = AT.WorkSpace;
4313 }
4314/*
4315 Get the locks for reading the input
4316 This means that we can start once these locks have been cleared
4317 which means that there will be input.
4318*/
4319 LOCK(Bin1->T.SB.MasterBlockLock[blin1]);
4320 LOCK(Bin2->T.SB.MasterBlockLock[blin2]);
4321
4322 term1 = Bin1->T.SB.MasterStart[blin1];
4323 term2 = Bin2->T.SB.MasterStart[blin2];
4324 AT.SB.FillBlock = 1;
4325/*
4326 Now the main loop. Keep going until one of the two hits the end.
4327*/
4328 while ( *term1 && *term2 ) {
4329 if ( ( c = CompareTerms(BHEAD term1,term2,(WORD)0) ) > 0 ) {
4330/*
4331 #[ One is smallest :
4332*/
4333 Bin1->T.SB.BlockTerms[blin1]--;
4334 if ( SortBotOut(BHEAD term1) < 0 ) {
4335 MLOCK(ErrorMessageLock);
4336 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4337 MUNLOCK(ErrorMessageLock);
4338 error = -1;
4339 goto ReturnError;
4340 }
4341 term1 += *term1;
4342 if ( Bin1->T.SB.BlockTerms[blin1] == 0 ) {
4343 if ( blin1 == 1 ) {
4344 UNLOCK(Bin1->T.SB.MasterBlockLock[Bin1->T.SB.MasterNumBlocks]);
4345 }
4346 else {
4347 UNLOCK(Bin1->T.SB.MasterBlockLock[blin1-1]);
4348 }
4349 if ( blin1 == Bin1->T.SB.MasterNumBlocks ) {
4350 blin1 = 1;
4351 }
4352 else {
4353 blin1++;
4354 }
4355 LOCK(Bin1->T.SB.MasterBlockLock[blin1]);
4356 Bin1->T.SB.MasterBlock = blin1;
4357 term1 = Bin1->T.SB.MasterStart[blin1];
4358 }
4359/*
4360 #] One is smallest :
4361*/
4362 }
4363 else if ( c < 0 ) {
4364/*
4365 #[ Two is smallest :
4366*/
4367 Bin2->T.SB.BlockTerms[blin2]--;
4368 if ( SortBotOut(BHEAD term2) < 0 ) {
4369 MLOCK(ErrorMessageLock);
4370 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4371 MUNLOCK(ErrorMessageLock);
4372 error = -1;
4373 goto ReturnError;
4374 }
4375next2:
4376 term2 += *term2;
4377 if ( Bin2->T.SB.BlockTerms[blin2] == 0 ) {
4378 if ( blin2 == 1 ) {
4379 UNLOCK(Bin2->T.SB.MasterBlockLock[Bin2->T.SB.MasterNumBlocks]);
4380 }
4381 else {
4382 UNLOCK(Bin2->T.SB.MasterBlockLock[blin2-1]);
4383 }
4384 if ( blin2 == Bin2->T.SB.MasterNumBlocks ) {
4385 blin2 = 1;
4386 }
4387 else {
4388 blin2++;
4389 }
4390 LOCK(Bin2->T.SB.MasterBlockLock[blin2]);
4391 Bin2->T.SB.MasterBlock = blin2;
4392 term2 = Bin2->T.SB.MasterStart[blin2];
4393 }
4394/*
4395 #] Two is smallest :
4396*/
4397 }
4398 else {
4399/*
4400 #[ Equal :
4401*/
4402 Bin1->T.SB.BlockTerms[blin1]--;
4403 Bin2->T.SB.BlockTerms[blin2]--;
4404 l1 = *( m1 = term1 );
4405 l2 = *( m2 = term2 );
4406 if ( S->PolyWise ) { /* Here we work with PolyFun */
4407 tt1 = m1;
4408 m1 += S->PolyWise;
4409 m2 += S->PolyWise;
4410 if ( S->PolyFlag == 2 ) {
4411 AT.WorkPointer = wp;
4412 w = poly_ratfun_add(BHEAD m1,m2);
4413 if ( *tt1 + w[1] - m1[1] > AM.MaxTer/((LONG)sizeof(WORD)) ) {
4414 MLOCK(ErrorMessageLock);
4415 MesPrint("Term too complex in PolyRatFun addition. MaxTermSize of %10l is too small",AM.MaxTer);
4416 MUNLOCK(ErrorMessageLock);
4417 Terminate(-1);
4418 }
4419 AT.WorkPointer = wp;
4420 if ( w[FUNHEAD] == -SNUMBER && w[FUNHEAD+1] == 0 && w[1] > FUNHEAD ) {
4421 goto cancelled;
4422 }
4423 }
4424 else {
4425 w = wp;
4426 if ( w + m1[1] + m2[1] > AT.WorkTop ) {
4427 MLOCK(ErrorMessageLock);
4428 MesPrint("SortBotMerge(%d): A Maxtermsize of %10l is too small",
4429 AT.identity,AM.MaxTer/sizeof(WORD));
4430 MesPrint("m1[1] = %d, m2[1] = %d, Space = %l",m1[1],m2[1],(LONG)(AT.WorkTop-wp));
4431 PrintTerm(term1,"term1");
4432 PrintTerm(term2,"term2");
4433 MesPrint("PolyWise = %d",S->PolyWise);
4434 MUNLOCK(ErrorMessageLock);
4435 Terminate(-1);
4436 }
4437 AddArgs(BHEAD m1,m2,w);
4438 }
4439 r1 = w[1];
4440 if ( r1 <= FUNHEAD
4441 || ( w[FUNHEAD] == -SNUMBER && w[FUNHEAD+1] == 0 ) )
4442 { goto cancelled; }
4443 if ( r1 == m1[1] ) {
4444 NCOPY(m1,w,r1);
4445 }
4446 else if ( r1 < m1[1] ) {
4447 r2 = m1[1] - r1;
4448 m2 = w + r1;
4449 m1 += m1[1];
4450 while ( --r1 >= 0 ) *--m1 = *--m2;
4451 m2 = m1 - r2;
4452 r1 = S->PolyWise;
4453 while ( --r1 >= 0 ) *--m1 = *--m2;
4454 *m1 -= r2;
4455 term1 = m1;
4456 }
4457 else {
4458 // Here we are writing the new merged term *before* the original start of term1.
4459 // We can always do this, since before term1 there is previous term data of this
4460 // block, or the previous block, for which we are holding a lock. This requires
4461 // the existence of "block 0", if term1 is the first term of block 1!
4462 // It also requires the blocks to be contiguous in memory; we can't allocate
4463 // separate memory regions for each block without larger-scale changes.
4464 r2 = r1 - m1[1];
4465 m2 = tt1 - r2;
4466 r1 = S->PolyWise;
4467 m1 = tt1;
4468 *m1 += r2;
4469 term1 = m2;
4470 NCOPY(m2,m1,r1);
4471 r1 = w[1];
4472 NCOPY(m2,w,r1);
4473 }
4474 }
4475#ifdef WITHFLOAT
4476 else if ( AT.SortFloatMode ) {
4477/*
4478 The terms are in m1/term1 and m2/term2 and their length in l1 and l2.
4479 We have to locate the floats which have already been
4480 verified in the compare routine. Once we have the new
4481 term we can jump to the code that writes away the
4482 result after adding the rationals.
4483*/
4484 tstop1 = m1+l1; size1 = tstop1[-1]; tstop1 -= ABS(size1);
4485 tstop2 = m2+l2; size2 = tstop2[-1]; tstop2 -= ABS(size2);
4486 if ( AT.SortFloatMode == 3 ) {
4487 fun1 = m1+1;
4488 while ( fun1[0] != FLOATFUN && fun1+fun1[1] < tstop1 ) {
4489 fun1 += fun1[1];
4490 }
4491 if ( size1 < 0 ) fun1[FUNHEAD+3] = -fun1[FUNHEAD+3];
4492 UnpackFloat(aux1,fun1);
4493 fun2 = m2+1;
4494 while ( fun2[0] != FLOATFUN && fun2+fun2[1] < tstop2 ) {
4495 fun2 += fun2[1];
4496 }
4497 if ( size2 < 0 ) fun2[FUNHEAD+3] = -fun2[FUNHEAD+3];
4498 UnpackFloat(aux2,fun2);
4499 }
4500 else if ( AT.SortFloatMode == 1 ) {
4501 fun1 = m1+1;
4502 while ( fun1[0] != FLOATFUN && fun1+fun1[1] < tstop1 ) {
4503 fun1 += fun1[1];
4504 }
4505 if ( size1 < 0 ) fun1[FUNHEAD+3] = -fun1[FUNHEAD+3];
4506 UnpackFloat(aux1,fun1);
4507 fun2 = tstop2;
4508 RatToFloat(aux2,(UWORD *)fun2,size2);
4509 }
4510 else if ( AT.SortFloatMode == 2 ) {
4511 fun1 = tstop1;
4512 RatToFloat(aux1,(UWORD *)fun1,size1);
4513 fun2 = m2+1;
4514 while ( fun2[0] != FLOATFUN && fun2+fun2[1] < tstop2 ) {
4515 fun2 += fun2[1];
4516 }
4517 if ( size2 < 0 ) fun2[FUNHEAD+3] = -fun2[FUNHEAD+3];
4518 UnpackFloat(aux2,fun2);
4519 }
4520 else {
4521 MLOCK(ErrorMessageLock);
4522 MesPrint("Illegal value %d for AT.SortFloatMode in SortBotMerge.",AT.SortFloatMode);
4523 MUNLOCK(ErrorMessageLock);
4524 Terminate(-1);
4525 return(0);
4526 }
4527 mpf_add(aux3,aux1,aux2);
4528 size3 = mpf_sgn(aux3);
4529 if ( size3 == 0 ) { /* Cancelling! Rare! */
4530 goto cancelled;
4531 }
4532 else if ( size3 < 0 ) mpf_neg(aux3,aux3);
4533 fun3 = TermMalloc("MasterMerge");
4534 PackFloat(fun3,aux3);
4535 l3 = fun3[1]+(fun1-m1)+3; /* new size */
4536
4537 if ( l3 != l1 ) {
4538/*
4539 Copy it all to wp
4540*/
4541 if ( (l3)*((LONG)sizeof(WORD)) >= AM.MaxTer ) {
4542 MLOCK(ErrorMessageLock);
4543 MesPrint("MasterMerge: Coefficient overflow during sort");
4544 MUNLOCK(ErrorMessageLock);
4545 goto ReturnError;
4546 }
4547 m3 = wp; m2 = term1;
4548 while ( m2 < fun1 ) *m3++ = *m2++;
4549 for ( jj = 0; jj < fun3[1]; jj++ ) *m3++ = fun3[jj];
4550 *m3++ = 1; *m3++ = 1;
4551 *m3++ = size3 < 0 ? -3: 3;
4552 *wp = m3-wp;
4553 TermFree(fun3,"MasterMerge");
4554 goto PutOutwp;
4555 }
4556 for ( jj = 0; jj < fun3[1]; jj++ ) fun1[jj] = fun3[jj];
4557 fun1 += fun3[1];
4558 *fun1++ = 1; *fun1++ = 1;
4559 *fun1++ = size3 < 0 ? -3: 3;
4560 *term1 = fun1-term1;
4561 TermFree(fun3,"MasterMerge");
4562 }
4563#endif
4564 else {
4565 r1 = *( m1 += l1 - 1 );
4566 m1 -= ABS(r1) - 1;
4567 r1 = ( ( r1 > 0 ) ? (r1-1) : (r1+1) ) >> 1;
4568 r2 = *( m2 += l2 - 1 );
4569 m2 -= ABS(r2) - 1;
4570 r2 = ( ( r2 > 0 ) ? (r2-1) : (r2+1) ) >> 1;
4571
4572 if ( AddRat(BHEAD (UWORD *)m1,r1,(UWORD *)m2,r2,coef,&r3) ) {
4573 MLOCK(ErrorMessageLock);
4574 MesCall("SortBotMerge");
4575 MUNLOCK(ErrorMessageLock);
4576 SETERROR(-1)
4577 }
4578
4579 if ( AN.ncmod != 0 ) {
4580 if ( ( AC.modmode & POSNEG ) != 0 ) {
4581 NormalModulus(coef,&r3);
4582 }
4583 else if ( BigLong(coef,r3,(UWORD *)AC.cmod,ABS(AN.ncmod)) >= 0 ) {
4584 SubPLon(coef,r3,(UWORD *)AC.cmod,ABS(AN.ncmod),coef,&r3);
4585 coef[r3] = 1;
4586 for ( ii = 1; ii < r3; ii++ ) coef[r3+ii] = 0;
4587 }
4588 }
4589 if ( !r3 ) { goto cancelled; }
4590 r3 *= 2;
4591 r33 = ( r3 > 0 ) ? ( r3 + 1 ) : ( r3 - 1 );
4592 if ( r3 < 0 ) r3 = -r3;
4593 if ( r1 < 0 ) r1 = -r1;
4594 r1 *= 2;
4595 r31 = r3 - r1;
4596 if ( !r31 ) { /* copy coef into term1 */
4597 m2 = (WORD *)coef; im = r3;
4598 NCOPY(m1,m2,im);
4599 *m1 = r33;
4600 }
4601 else {
4602 to = wp; from = term1;
4603 while ( from < m1 ) *to++ = *from++;
4604 from = (WORD *)coef; im = r3;
4605 NCOPY(to,from,im);
4606 *to++ = r33;
4607 wp[0] = to - wp;
4608PutOutwp:
4609 if ( SortBotOut(BHEAD wp) < 0 ) {
4610 MLOCK(ErrorMessageLock);
4611 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4612 MUNLOCK(ErrorMessageLock);
4613 error = -1;
4614 goto ReturnError;
4615 }
4616 goto cancelled;
4617 }
4618 }
4619 if ( SortBotOut(BHEAD term1) < 0 ) {
4620 MLOCK(ErrorMessageLock);
4621 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4622 MUNLOCK(ErrorMessageLock);
4623 error = -1;
4624 goto ReturnError;
4625 }
4626cancelled:; /* Now we need two new terms */
4627 term1 += *term1;
4628 if ( Bin1->T.SB.BlockTerms[blin1] == 0 ) {
4629
4630 if ( blin1 == 1 ) {
4631 UNLOCK(Bin1->T.SB.MasterBlockLock[Bin1->T.SB.MasterNumBlocks]);
4632 }
4633 else {
4634 UNLOCK(Bin1->T.SB.MasterBlockLock[blin1-1]);
4635 }
4636 if ( blin1 == Bin1->T.SB.MasterNumBlocks ) {
4637 blin1 = 1;
4638 }
4639 else {
4640 blin1++;
4641 }
4642 LOCK(Bin1->T.SB.MasterBlockLock[blin1]);
4643 Bin1->T.SB.MasterBlock = blin1;
4644 term1 = Bin1->T.SB.MasterStart[blin1];
4645 }
4646 goto next2;
4647/*
4648 #] Equal :
4649*/
4650 }
4651 }
4652/*
4653 Copy the tail
4654*/
4655 if ( *term1 ) {
4656/*
4657 #[ Tail in one :
4658*/
4659 while ( *term1 ) {
4660 Bin1->T.SB.BlockTerms[blin1]--;
4661 if ( SortBotOut(BHEAD term1) < 0 ) {
4662 MLOCK(ErrorMessageLock);
4663 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4664 MUNLOCK(ErrorMessageLock);
4665 error = -1;
4666 goto ReturnError;
4667 }
4668 if ( Bin1->T.SB.BlockTerms[blin1] == 0 ) {
4669
4670 if ( blin1 == 1 ) {
4671 UNLOCK(Bin1->T.SB.MasterBlockLock[Bin1->T.SB.MasterNumBlocks]);
4672 }
4673 else {
4674 UNLOCK(Bin1->T.SB.MasterBlockLock[blin1-1]);
4675 }
4676 if ( blin1 == Bin1->T.SB.MasterNumBlocks ) {
4677 blin1 = 1;
4678 }
4679 else {
4680 blin1++;
4681 }
4682 LOCK(Bin1->T.SB.MasterBlockLock[blin1]);
4683 Bin1->T.SB.MasterBlock = blin1;
4684 term1 = Bin1->T.SB.MasterStart[blin1];
4685 }
4686 else {
4687 term1 += *term1;
4688 }
4689 }
4690/*
4691 #] Tail in one :
4692*/
4693 }
4694 else if ( *term2 ) {
4695/*
4696 #[ Tail in two :
4697*/
4698 while ( *term2 ) {
4699 Bin2->T.SB.BlockTerms[blin2]--;
4700 if ( SortBotOut(BHEAD term2) < 0 ) {
4701 MLOCK(ErrorMessageLock);
4702 MesPrint("Called from SortBotMerge with thread = %d",AT.identity);
4703 MUNLOCK(ErrorMessageLock);
4704 error = -1;
4705 goto ReturnError;
4706 }
4707 if ( Bin2->T.SB.BlockTerms[blin2] == 0 ) {
4708
4709 if ( blin2 == 1 ) {
4710 UNLOCK(Bin2->T.SB.MasterBlockLock[Bin2->T.SB.MasterNumBlocks]);
4711 }
4712 else {
4713 UNLOCK(Bin2->T.SB.MasterBlockLock[blin2-1]);
4714 }
4715 if ( blin2 == Bin2->T.SB.MasterNumBlocks ) {
4716 blin2 = 1;
4717 }
4718 else {
4719 blin2++;
4720 }
4721 LOCK(Bin2->T.SB.MasterBlockLock[blin2]);
4722 Bin2->T.SB.MasterBlock = blin2;
4723 term2 = Bin2->T.SB.MasterStart[blin2];
4724 }
4725 else {
4726 term2 += *term2;
4727 }
4728 }
4729/*
4730 #] Tail in two :
4731*/
4732 }
4733
4734 // Both streams have hit the end-of-stream marker "0". We still need to
4735 // decrement the BlockTerms counters a final time, the marker is included
4736 // in the count.
4737 Bin1->T.SB.BlockTerms[blin1]--;
4738 Bin2->T.SB.BlockTerms[blin2]--;
4739
4740 SortBotOut(BHEAD 0);
4741ReturnError:;
4742/*
4743 Release all locks.
4744*/
4745 UNLOCK(Bin1->T.SB.MasterBlockLock[blin1]);
4746 if ( blin1 > 1 ) {
4747 UNLOCK(Bin1->T.SB.MasterBlockLock[blin1-1]);
4748 }
4749 else {
4750 UNLOCK(Bin1->T.SB.MasterBlockLock[Bin1->T.SB.MasterNumBlocks]);
4751 }
4752 UNLOCK(Bin2->T.SB.MasterBlockLock[blin2]);
4753 if ( blin2 > 1 ) {
4754 UNLOCK(Bin2->T.SB.MasterBlockLock[blin2-1]);
4755 }
4756 else {
4757 UNLOCK(Bin2->T.SB.MasterBlockLock[Bin2->T.SB.MasterNumBlocks]);
4758 }
4759 if ( AT.identity > 0 ) {
4760 UNLOCK(AT.SB.MasterBlockLock[AT.SB.FillBlock]);
4761 }
4762/*
4763 And that was all folks
4764*/
4765 return(error);
4766}
4767
4768#endif
4769
4770/*
4771 #] SortBotMerge :
4772 #[ IniSortBlocks :
4773*/
4774
4775static int SortBlocksInitialized = 0;
4776
4783int IniSortBlocks(int numworkers)
4784{
4785 ALLPRIVATES *B;
4786 SORTING *S;
4787 LONG totalsize, workersize, blocksize, numberofterms;
4788 int maxter, id, j;
4789 int numberofblocks = NUMBEROFBLOCKSINSORT, numparts;
4790 WORD *w;
4791
4792 if ( SortBlocksInitialized ) return(0);
4793 SortBlocksInitialized = 1;
4794 if ( numworkers == 0 ) return(0);
4795
4796#ifdef WITHSORTBOTS
4797 if ( numworkers > 2 ) {
4798 numparts = 2*numworkers - 2;
4799 numberofblocks = numberofblocks/2;
4800 }
4801 else {
4802 numparts = numworkers;
4803 }
4804#else
4805 numparts = numworkers;
4806#endif
4807 S = AM.S0;
4808 totalsize = S->LargeSize + S->SmallEsize;
4809 workersize = totalsize / numparts;
4810 maxter = AM.MaxTer/sizeof(WORD);
4811 blocksize = ( workersize - maxter )/numberofblocks;
4812 numberofterms = blocksize / maxter;
4813 if ( numberofterms < MINIMUMNUMBEROFTERMS ) {
4814/*
4815 This should have been taken care of in RecalcSetups.
4816*/
4817 MesPrint("We have a problem with the size of the blocks in IniSortBlocks");
4818 Terminate(-1);
4819 }
4820/*
4821 Layout: For each worker
4822 block 0: size is maxter WORDS
4823 numberofblocks blocks of size blocksize WORDS
4824*/
4825 w = S->lBuffer;
4826 if ( w == 0 ) w = S->sBuffer;
4827 for ( id = 1; id <= numparts; id++ ) {
4828 B = AB[id];
4829 AT.SB.MasterBlockLock = (pthread_mutex_t *)Malloc1(
4830 sizeof(pthread_mutex_t)*(numberofblocks+1),"MasterBlockLock");
4831 AT.SB.MasterStart = (WORD **)Malloc1(sizeof(WORD *)*(numberofblocks+1)*3,"MasterBlock");
4832 AT.SB.MasterFill = AT.SB.MasterStart + (numberofblocks+1);
4833 AT.SB.MasterStop = AT.SB.MasterFill + (numberofblocks+1);
4834 AT.SB.MasterNumBlocks = numberofblocks;
4835 AT.SB.BlockTerms = (LONG*)Malloc1(sizeof(LONG)*(numberofblocks+1),"BlockTerms");
4836 AT.SB.MasterBlock = 0;
4837 AT.SB.FillBlock = 0;
4838 AT.SB.MasterFill[0] = AT.SB.MasterStart[0] = w;
4839 AT.SB.BlockTerms[0] = 0;
4840 w += maxter;
4841 AT.SB.MasterStop[0] = w;
4842 AT.SB.MasterBlockLock[0] = dummylock;
4843 for ( j = 1; j <= numberofblocks; j++ ) {
4844 AT.SB.MasterFill[j] = AT.SB.MasterStart[j] = w;
4845 AT.SB.BlockTerms[j] = 0;
4846 w += blocksize;
4847 AT.SB.MasterStop[j] = w;
4848 AT.SB.MasterBlockLock[j] = dummylock;
4849 }
4850 }
4851 if ( w > S->sTop2 ) {
4852 MesPrint("Counting problem in IniSortBlocks");
4853 Terminate(-1);
4854 }
4855 return(0);
4856}
4857
4858/*
4859 #] IniSortBlocks :
4860 #[ UpdateSortBlocks :
4861*/
4862
4867int UpdateSortBlocks(int numworkers)
4868{
4869 ALLPRIVATES *B;
4870 SORTING *S;
4871 LONG totalsize, workersize, blocksize, numberofterms;
4872 int maxter, id, j;
4873 int numberofblocks = NUMBEROFBLOCKSINSORT, numparts;
4874 WORD *w;
4875
4876 if ( numworkers == 0 ) return(0);
4877
4878#ifdef WITHSORTBOTS
4879 if ( numworkers > 2 ) {
4880 numparts = 2*numworkers - 2;
4881 numberofblocks = numberofblocks/2;
4882 }
4883 else {
4884 numparts = numworkers;
4885 }
4886#else
4887 numparts = numworkers;
4888#endif
4889 S = AM.S0;
4890 totalsize = S->LargeSize + S->SmallEsize;
4891 workersize = totalsize / numparts;
4892 maxter = AM.MaxTer/sizeof(WORD);
4893 blocksize = ( workersize - maxter )/numberofblocks;
4894 numberofterms = blocksize / maxter;
4895 if ( numberofterms < MINIMUMNUMBEROFTERMS ) {
4896/*
4897 This should have been taken care of in RecalcSetups.
4898*/
4899 MesPrint("We have a problem with the size of the blocks in UpdateSortBlocks");
4900 Terminate(-1);
4901 }
4902/*
4903 Layout: For each worker
4904 block 0: size is maxter WORDS
4905 numberofblocks blocks of size blocksize WORDS
4906*/
4907 w = S->lBuffer;
4908 if ( w == 0 ) w = S->sBuffer;
4909 for ( id = 1; id <= numparts; id++ ) {
4910 B = AB[id];
4911 AT.SB.MasterFill[0] = AT.SB.MasterStart[0] = w;
4912 w += maxter;
4913 AT.SB.MasterStop[0] = w;
4914 for ( j = 1; j <= numberofblocks; j++ ) {
4915 AT.SB.MasterFill[j] = AT.SB.MasterStart[j] = w;
4916 w += blocksize;
4917 AT.SB.MasterStop[j] = w;
4918 }
4919 }
4920 if ( w > S->sTop2 ) {
4921 MesPrint("Counting problem in UpdateSortBlocks");
4922 Terminate(-1);
4923 }
4924 return(0);
4925}
4926
4927/*
4928 #] UpdateSortBlocks :
4929 #[ DefineSortBotTree :
4930*/
4931
4932#ifdef WITHSORTBOTS
4933
4939void DefineSortBotTree(void)
4940{
4941 ALLPRIVATES *B;
4942 int n, i, from;
4943 if ( numberofworkers <= 2 ) return;
4944 n = numberofworkers*2-2;
4945 for ( i = numberofworkers+1, from = 1; i <= n; i++ ) {
4946 B = AB[i];
4947 AT.SortBotIn1 = from++;
4948 AT.SortBotIn2 = from++;
4949 }
4950 B = AB[0];
4951 AT.SortBotIn1 = from++;
4952 AT.SortBotIn2 = from++;
4953}
4954
4955#endif
4956
4957/*
4958 #] DefineSortBotTree :
4959 #[ GetTerm2 :
4960
4961 Routine does a GetTerm but only when a bracket index is involved and
4962 only from brackets that have been judged not suitable for treatment
4963 as complete brackets by a single worker. Whether or not a bracket should
4964 be treated by a single worker is decided by TreatIndexEntry
4965*/
4966
4967WORD GetTerm2(PHEAD WORD *term)
4968{
4969 WORD *ttco, *tt, retval;
4970 LONG n,i;
4971 FILEHANDLE *fi;
4972 EXPRESSIONS e = AN.expr;
4973 BRACKETINFO *b = e->bracketinfo;
4974 BRACKETINDEX *bi = b->indexbuffer;
4975 POSITION where, eonfile = AS.OldOnFile[e-Expressions], bstart, bnext;
4976/*
4977 1: Get the current position.
4978*/
4979 switch ( e->status ) {
4980 case UNHIDELEXPRESSION:
4981 case UNHIDEGEXPRESSION:
4982 case DROPHLEXPRESSION:
4983 case DROPHGEXPRESSION:
4984 case HIDDENLEXPRESSION:
4985 case HIDDENGEXPRESSION:
4986 fi = AR.hidefile;
4987 break;
4988 default:
4989 fi = AR.infile;
4990 break;
4991 }
4992 if ( AR.KeptInHold ) {
4993 retval = GetTerm(BHEAD term);
4994 return(retval);
4995 }
4996 SeekScratch(fi,&where);
4997 if ( AN.lastinindex < 0 ) {
4998/*
4999 We have to test whether we have to do the first bracket
5000*/
5001 if ( ( n = TreatIndexEntry(BHEAD 0) ) <= 0 ) {
5002 AN.lastinindex = n;
5003 where = bi[n].start;
5004 ADD2POS(where,eonfile);
5005 SetScratch(fi,&where);
5006/*
5007 Put the bracket in the Compress buffer.
5008*/
5009 ttco = AR.CompressBuffer;
5010 tt = b->bracketbuffer + bi[0].bracket;
5011 i = *tt;
5012 NCOPY(ttco,tt,i)
5013 AR.CompressPointer = ttco;
5014 retval = GetTerm(BHEAD term);
5015 return(retval);
5016 }
5017 else AN.lastinindex = n-1;
5018 }
5019/*
5020 2: Find the corresponding index number
5021 a: test whether it is in the current bracket
5022*/
5023 n = AN.lastinindex;
5024 bstart = bi[n].start;
5025 ADD2POS(bstart,eonfile);
5026 bnext = bi[n].next;
5027 ADD2POS(bnext,eonfile);
5028 if ( ISLESSPOS(bstart,where) && ISLESSPOS(where,bnext) ) {
5029 retval = GetTerm(BHEAD term);
5030 return(retval);
5031 }
5032 for ( n++ ; n < b->indexfill; n++ ) {
5033 i = TreatIndexEntry(BHEAD n);
5034 if ( i <= 0 ) {
5035/*
5036 Put the bracket in the Compress buffer.
5037*/
5038 ttco = AR.CompressBuffer;
5039 tt = b->bracketbuffer + bi[n].bracket;
5040 i = *tt;
5041 NCOPY(ttco,tt,i)
5042 AR.CompressPointer = ttco;
5043 AN.lastinindex = n;
5044 where = bi[n].start;
5045 ADD2POS(where,eonfile);
5046 SetScratch(fi,&(where));
5047 retval = GetTerm(BHEAD term);
5048 return(retval);
5049 }
5050 else n += i - 1;
5051 }
5052 return(0);
5053}
5054
5055/*
5056 #] GetTerm2 :
5057 #[ TreatIndexEntry :
5058*/
5067int TreatIndexEntry(PHEAD LONG n)
5068{
5069 BRACKETINFO *b = AN.expr->bracketinfo;
5070 LONG numbra = b->indexfill - 1, i;
5071 LONG totterms;
5072 BRACKETINDEX *bi;
5073 POSITION pos1, average;
5074/*
5075 1: number of the bracket should be such that there is one bucket
5076 for each worker remaining.
5077*/
5078 if ( ( numbra - n ) <= numberofworkers ) return(0);
5079/*
5080 2: size of the bracket contents should be less than what remains in
5081 the expression divided by the number of workers.
5082*/
5083 bi = b->indexbuffer;
5084 DIFPOS(pos1,bi[numbra].next,bi[n].next); /* Size of what remains */
5085 BASEPOSITION(average) = DIVPOS(pos1,(3*numberofworkers));
5086 DIFPOS(pos1,bi[n].next,bi[n].start); /* Size of the current bracket */
5087 if ( ISLESSPOS(average,pos1) ) return(0);
5088/*
5089 It passes.
5090 Now check whether we can do more brackets
5091*/
5092 totterms = bi->termsinbracket;
5093 if ( totterms > 2*AC.ThreadBucketSize ) return(1);
5094 for ( i = 1; i < numbra-n; i++ ) {
5095 DIFPOS(pos1,bi[n+i].next,bi[n].start); /* Size of the combined brackets */
5096 if ( ISLESSPOS(average,pos1) ) return(i);
5097 totterms += bi->termsinbracket;
5098 if ( totterms > 2*AC.ThreadBucketSize ) return(i+1);
5099 }
5100/*
5101 We have a problem at the end of the system. Just do one.
5102*/
5103 return(1);
5104}
5105
5106/*
5107 #] TreatIndexEntry :
5108 #[ SetHideFiles :
5109*/
5110
5111void SetHideFiles(void) {
5112 int i;
5113 ALLPRIVATES *B, *B0 = AB[0];
5114 for ( i = 1; i <= numberofworkers; i++ ) {
5115 B = AB[i];
5116 AR.hidefile->handle = AR0.hidefile->handle;
5117 if ( AR.hidefile->handle < 0 ) {
5118 AR.hidefile->PObuffer = AR0.hidefile->PObuffer;
5119 AR.hidefile->POstop = AR0.hidefile->POstop;
5120 AR.hidefile->POfill = AR0.hidefile->POfill;
5121 AR.hidefile->POfull = AR0.hidefile->POfull;
5122 AR.hidefile->POsize = AR0.hidefile->POsize;
5123 AR.hidefile->POposition = AR0.hidefile->POposition;
5124 AR.hidefile->filesize = AR0.hidefile->filesize;
5125 }
5126 else {
5127 AR.hidefile->PObuffer = AR.hidefile->wPObuffer;
5128 AR.hidefile->POstop = AR.hidefile->wPOstop;
5129 AR.hidefile->POfill = AR.hidefile->wPOfill;
5130 AR.hidefile->POfull = AR.hidefile->wPOfull;
5131 AR.hidefile->POsize = AR.hidefile->wPOsize;
5132 PUTZERO(AR.hidefile->POposition);
5133 }
5134 }
5135}
5136
5137/*
5138 #] SetHideFiles :
5139 #[ IniFbufs :
5140*/
5141
5142void IniFbufs(void)
5143{
5144 int i;
5145 for ( i = 0; i < AM.totalnumberofthreads; i++ ) {
5146 IniFbuffer(AB[i]->T.fbufnum);
5147 }
5148}
5149
5150/*
5151 #] IniFbufs :
5152 #[ SetMods :
5153*/
5154
5155void SetMods(void)
5156{
5157 ALLPRIVATES *B;
5158 int i, n, j;
5159 for ( j = 0; j < AM.totalnumberofthreads; j++ ) {
5160 B = AB[j];
5161 AN.ncmod = AC.ncmod;
5162 if ( AN.cmod != 0 ) M_free(AN.cmod,"AN.cmod");
5163 n = ABS(AN.ncmod);
5164 AN.cmod = (UWORD *)Malloc1(sizeof(WORD)*n,"AN.cmod");
5165 for ( i = 0; i < n; i++ ) AN.cmod[i] = AC.cmod[i];
5166 }
5167}
5168
5169/*
5170 #] SetMods :
5171 #[ UnSetMods :
5172*/
5173
5174void UnSetMods(void)
5175{
5176 ALLPRIVATES *B;
5177 int j;
5178 for ( j = 0; j < AM.totalnumberofthreads; j++ ) {
5179 B = AB[j];
5180 if ( AN.cmod != 0 ) M_free(AN.cmod,"AN.cmod");
5181 AN.cmod = 0;
5182 }
5183}
5184
5185/*
5186 #] UnSetMods :
5187 #[ find_Horner_MCTS_expand_tree_threaded :
5188*/
5189
5190void find_Horner_MCTS_expand_tree_threaded(void) {
5191 int id;
5192 while (( id = GetAvailableThread() ) < 0)
5193 MasterWait();
5194 WakeupThread(id,MCTSEXPANDTREE);
5195}
5196
5197/*
5198 #] find_Horner_MCTS_expand_tree_threaded :
5199 #[ optimize_expression_given_Horner_threaded :
5200*/
5201
5202extern void optimize_expression_given_Horner_threaded(void) {
5203 int id;
5204 while (( id = GetAvailableThread() ) < 0)
5205 MasterWait();
5206 WakeupThread(id,OPTIMIZEEXPRESSION);
5207}
5208
5209/*
5210 #] optimize_expression_given_Horner_threaded :
5211*/
5212
5213#endif
int inicbufs(void)
Definition comtool.c:47
int IniFbuffer(WORD bufnum)
Definition comtool.c:614
void AddArgs(PHEAD WORD *, WORD *, WORD *)
Definition sort.c:2048
WORD * poly_ratfun_add(PHEAD WORD *, WORD *)
Definition polywrap.cc:633
int poly_unfactorize_expression(EXPRESSIONS)
Definition polywrap.cc:1535
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
Definition sort.c:1171
LONG EndSort(PHEAD WORD *, int)
Definition sort.c:454
int Generator(PHEAD WORD *, WORD)
Definition proces.c:3249
void LowerSortLevel(void)
Definition sort.c:4661
int StoreTerm(PHEAD WORD *)
Definition sort.c:4244
int poly_factorize_expression(EXPRESSIONS)
Definition polywrap.cc:1178
void WriteStats(POSITION *, WORD, WORD)
Definition sort.c:129
int NewSort(PHEAD0)
Definition sort.c:359
int NormalModulus(UWORD *, WORD *)
Definition reken.c:1404
int FlushOut(POSITION *, FILEHANDLE *, int)
Definition sort.c:1533
WORD Compare1(PHEAD WORD *, WORD *, WORD)
Definition sort.c:2341
LONG TimeCPU(WORD)
Definition tools.c:3487
void optimize_expression_given_Horner()
Definition optimize.cc:4060
BRACKETINDEX * indexbuffer
Definition structs.h:323
WORD * bracketbuffer
Definition structs.h:324
WORD ** rhs
Definition structs.h:975
WORD ** lhs
Definition structs.h:974
WORD * Buffer
Definition structs.h:971
WORD * Pointer
Definition structs.h:973
int handle
Definition structs.h:709
struct NeStInG * NESTING
struct StOrEcAcHe * STORECACHE