~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/kernel/sched.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/kernel/sched.c
  3  *
  4  *  Kernel scheduler and related syscalls
  5  *
  6  *  Copyright (C) 1991, 1992  Linus Torvalds
  7  *
  8  *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
  9  *              make semaphores SMP safe
 10  *  1998-11-19  Implemented schedule_timeout() and related stuff
 11  *              by Andrea Arcangeli
 12  *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
 13  */
 14 
 15 /*
 16  * 'sched.c' is the main kernel file. It contains scheduling primitives
 17  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
 18  * call functions (type getpid()), which just extract a field from
 19  * current-task
 20  */
 21 
 22 #include <linux/config.h>
 23 #include <linux/mm.h>
 24 #include <linux/init.h>
 25 #include <linux/smp_lock.h>
 26 #include <linux/interrupt.h>
 27 #include <linux/kernel_stat.h>
 28 
 29 #include <asm/uaccess.h>
 30 #include <asm/mmu_context.h>
 31 
 32 extern void timer_bh(void);
 33 extern void tqueue_bh(void);
 34 extern void immediate_bh(void);
 35 
 36 /*
 37  * scheduler variables
 38  */
 39 
 40 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
 41 
 42 extern void mem_use(void);
 43 
 44 /*
 45  * Scheduling quanta.
 46  *
 47  * NOTE! The unix "nice" value influences how long a process
 48  * gets. The nice value ranges from -20 to +19, where a -20
 49  * is a "high-priority" task, and a "+10" is a low-priority
 50  * task.
 51  *
 52  * We want the time-slice to be around 50ms or so, so this
 53  * calculation depends on the value of HZ.
 54  */
 55 #if HZ < 200
 56 #define TICK_SCALE(x)   ((x) >> 2)
 57 #elif HZ < 400
 58 #define TICK_SCALE(x)   ((x) >> 1)
 59 #elif HZ < 800
 60 #define TICK_SCALE(x)   (x)
 61 #elif HZ < 1600
 62 #define TICK_SCALE(x)   ((x) << 1)
 63 #else
 64 #define TICK_SCALE(x)   ((x) << 2)
 65 #endif
 66 
 67 #define NICE_TO_TICKS(nice)     (TICK_SCALE(20-(nice))+1)
 68 
 69 
 70 /*
 71  *      Init task must be ok at boot for the ix86 as we will check its signals
 72  *      via the SMP irq return path.
 73  */
 74  
 75 struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
 76 
 77 /*
 78  * The tasklist_lock protects the linked list of processes.
 79  *
 80  * The runqueue_lock locks the parts that actually access
 81  * and change the run-queues, and have to be interrupt-safe.
 82  *
 83  * If both locks are to be concurrently held, the runqueue_lock
 84  * nests inside the tasklist_lock.
 85  */
 86 spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;  /* inner */
 87 rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;  /* outer */
 88 
 89 static LIST_HEAD(runqueue_head);
 90 
 91 /*
 92  * We align per-CPU scheduling data on cacheline boundaries,
 93  * to prevent cacheline ping-pong.
 94  */
 95 static union {
 96         struct schedule_data {
 97                 struct task_struct * curr;
 98                 cycles_t last_schedule;
 99         } schedule_data;
100         char __pad [SMP_CACHE_BYTES];
101 } aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
102 
103 #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
104 #define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
105 
106 struct kernel_stat kstat;
107 
108 #ifdef CONFIG_SMP
109 
110 #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
111 #define can_schedule(p,cpu) ((!(p)->has_cpu) && \
112                                 ((p)->cpus_allowed & (1 << cpu)))
113 
114 #else
115 
116 #define idle_task(cpu) (&init_task)
117 #define can_schedule(p,cpu) (1)
118 
119 #endif
120 
121 void scheduling_functions_start_here(void) { }
122 
123 /*
124  * This is the function that decides how desirable a process is..
125  * You can weigh different processes against each other depending
126  * on what CPU they've run on lately etc to try to handle cache
127  * and TLB miss penalties.
128  *
129  * Return values:
130  *       -1000: never select this
131  *           0: out of time, recalculate counters (but it might still be
132  *              selected)
133  *         +ve: "goodness" value (the larger, the better)
134  *       +1000: realtime process, select this.
135  */
136 
137 static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
138 {
139         int weight;
140 
141         /*
142          * select the current process after every other
143          * runnable process, but before the idle thread.
144          * Also, dont trigger a counter recalculation.
145          */
146         weight = -1;
147         if (p->policy & SCHED_YIELD)
148                 goto out;
149 
150         /*
151          * Non-RT process - normal case first.
152          */
153         if (p->policy == SCHED_OTHER) {
154                 /*
155                  * Give the process a first-approximation goodness value
156                  * according to the number of clock-ticks it has left.
157                  *
158                  * Don't do any other calculations if the time slice is
159                  * over..
160                  */
161                 weight = p->counter;
162                 if (!weight)
163                         goto out;
164                         
165 #ifdef CONFIG_SMP
166                 /* Give a largish advantage to the same processor...   */
167                 /* (this is equivalent to penalizing other processors) */
168                 if (p->processor == this_cpu)
169                         weight += PROC_CHANGE_PENALTY;
170 #endif
171 
172                 /* .. and a slight advantage to the current MM */
173                 if (p->mm == this_mm || !p->mm)
174                         weight += 1;
175                 weight += 20 - p->nice;
176                 goto out;
177         }
178 
179         /*
180          * Realtime process, select the first one on the
181          * runqueue (taking priorities within processes
182          * into account).
183          */
184         weight = 1000 + p->rt_priority;
185 out:
186         return weight;
187 }
188 
189 /*
190  * the 'goodness value' of replacing a process on a given CPU.
191  * positive value means 'replace', zero or negative means 'dont'.
192  */
193 static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
194 {
195         return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
196 }
197 
198 /*
199  * This is ugly, but reschedule_idle() is very timing-critical.
200  * We are called with the runqueue spinlock held and we must
201  * not claim the tasklist_lock.
202  */
203 static FASTCALL(void reschedule_idle(struct task_struct * p));
204 
205 static void reschedule_idle(struct task_struct * p)
206 {
207 #ifdef CONFIG_SMP
208         int this_cpu = smp_processor_id();
209         struct task_struct *tsk, *target_tsk;
210         int cpu, best_cpu, i, max_prio;
211         cycles_t oldest_idle;
212 
213         /*
214          * shortcut if the woken up task's last CPU is
215          * idle now.
216          */
217         best_cpu = p->processor;
218         if (can_schedule(p, best_cpu)) {
219                 tsk = idle_task(best_cpu);
220                 if (cpu_curr(best_cpu) == tsk) {
221                         int need_resched;
222 send_now_idle:
223                         /*
224                          * If need_resched == -1 then we can skip sending
225                          * the IPI altogether, tsk->need_resched is
226                          * actively watched by the idle thread.
227                          */
228                         need_resched = tsk->need_resched;
229                         tsk->need_resched = 1;
230                         if ((best_cpu != this_cpu) && !need_resched)
231                                 smp_send_reschedule(best_cpu);
232                         return;
233                 }
234         }
235 
236         /*
237          * We know that the preferred CPU has a cache-affine current
238          * process, lets try to find a new idle CPU for the woken-up
239          * process. Select the least recently active idle CPU. (that
240          * one will have the least active cache context.) Also find
241          * the executing process which has the least priority.
242          */
243         oldest_idle = (cycles_t) -1;
244         target_tsk = NULL;
245         max_prio = 1;
246 
247         for (i = 0; i < smp_num_cpus; i++) {
248                 cpu = cpu_logical_map(i);
249                 if (!can_schedule(p, cpu))
250                         continue;
251                 tsk = cpu_curr(cpu);
252                 /*
253                  * We use the first available idle CPU. This creates
254                  * a priority list between idle CPUs, but this is not
255                  * a problem.
256                  */
257                 if (tsk == idle_task(cpu)) {
258                         if (last_schedule(cpu) < oldest_idle) {
259                                 oldest_idle = last_schedule(cpu);
260                                 target_tsk = tsk;
261                         }
262                 } else {
263                         if (oldest_idle == -1ULL) {
264                                 int prio = preemption_goodness(tsk, p, cpu);
265 
266                                 if (prio > max_prio) {
267                                         max_prio = prio;
268                                         target_tsk = tsk;
269                                 }
270                         }
271                 }
272         }
273         tsk = target_tsk;
274         if (tsk) {
275                 if (oldest_idle != -1ULL) {
276                         best_cpu = tsk->processor;
277                         goto send_now_idle;
278                 }
279                 tsk->need_resched = 1;
280                 if (tsk->processor != this_cpu)
281                         smp_send_reschedule(tsk->processor);
282         }
283         return;
284                 
285 
286 #else /* UP */
287         int this_cpu = smp_processor_id();
288         struct task_struct *tsk;
289 
290         tsk = cpu_curr(this_cpu);
291         if (preemption_goodness(tsk, p, this_cpu) > 1)
292                 tsk->need_resched = 1;
293 #endif
294 }
295 
296 /*
297  * Careful!
298  *
299  * This has to add the process to the _beginning_ of the
300  * run-queue, not the end. See the comment about "This is
301  * subtle" in the scheduler proper..
302  */
303 static inline void add_to_runqueue(struct task_struct * p)
304 {
305         list_add(&p->run_list, &runqueue_head);
306         nr_running++;
307 }
308 
309 static inline void move_last_runqueue(struct task_struct * p)
310 {
311         list_del(&p->run_list);
312         list_add_tail(&p->run_list, &runqueue_head);
313 }
314 
315 static inline void move_first_runqueue(struct task_struct * p)
316 {
317         list_del(&p->run_list);
318         list_add(&p->run_list, &runqueue_head);
319 }
320 
321 /*
322  * Wake up a process. Put it on the run-queue if it's not
323  * already there.  The "current" process is always on the
324  * run-queue (except when the actual re-schedule is in
325  * progress), and as such you're allowed to do the simpler
326  * "current->state = TASK_RUNNING" to mark yourself runnable
327  * without the overhead of this.
328  */
329 inline void wake_up_process(struct task_struct * p)
330 {
331         unsigned long flags;
332 
333         /*
334          * We want the common case fall through straight, thus the goto.
335          */
336         spin_lock_irqsave(&runqueue_lock, flags);
337         p->state = TASK_RUNNING;
338         if (task_on_runqueue(p))
339                 goto out;
340         add_to_runqueue(p);
341         reschedule_idle(p);
342 out:
343         spin_unlock_irqrestore(&runqueue_lock, flags);
344 }
345 
346 static inline void wake_up_process_synchronous(struct task_struct * p)
347 {
348         unsigned long flags;
349 
350         /*
351          * We want the common case fall through straight, thus the goto.
352          */
353         spin_lock_irqsave(&runqueue_lock, flags);
354         p->state = TASK_RUNNING;
355         if (task_on_runqueue(p))
356                 goto out;
357         add_to_runqueue(p);
358 out:
359         spin_unlock_irqrestore(&runqueue_lock, flags);
360 }
361 
362 static void process_timeout(unsigned long __data)
363 {
364         struct task_struct * p = (struct task_struct *) __data;
365 
366         wake_up_process(p);
367 }
368 
369 signed long schedule_timeout(signed long timeout)
370 {
371         struct timer_list timer;
372         unsigned long expire;
373 
374         switch (timeout)
375         {
376         case MAX_SCHEDULE_TIMEOUT:
377                 /*
378                  * These two special cases are useful to be comfortable
379                  * in the caller. Nothing more. We could take
380                  * MAX_SCHEDULE_TIMEOUT from one of the negative value
381                  * but I' d like to return a valid offset (>=0) to allow
382                  * the caller to do everything it want with the retval.
383                  */
384                 schedule();
385                 goto out;
386         default:
387                 /*
388                  * Another bit of PARANOID. Note that the retval will be
389                  * 0 since no piece of kernel is supposed to do a check
390                  * for a negative retval of schedule_timeout() (since it
391                  * should never happens anyway). You just have the printk()
392                  * that will tell you if something is gone wrong and where.
393                  */
394                 if (timeout < 0)
395                 {
396                         printk(KERN_ERR "schedule_timeout: wrong timeout "
397                                "value %lx from %p\n", timeout,
398                                __builtin_return_address(0));
399                         current->state = TASK_RUNNING;
400                         goto out;
401                 }
402         }
403 
404         expire = timeout + jiffies;
405 
406         init_timer(&timer);
407         timer.expires = expire;
408         timer.data = (unsigned long) current;
409         timer.function = process_timeout;
410 
411         add_timer(&timer);
412         schedule();
413         del_timer_sync(&timer);
414 
415         timeout = expire - jiffies;
416 
417  out:
418         return timeout < 0 ? 0 : timeout;
419 }
420 
421 /*
422  * schedule_tail() is getting called from the fork return path. This
423  * cleans up all remaining scheduler things, without impacting the
424  * common case.
425  */
426 static inline void __schedule_tail(struct task_struct *prev)
427 {
428 #ifdef CONFIG_SMP
429         int policy;
430 
431         /*
432          * prev->policy can be written from here only before `prev'
433          * can be scheduled (before setting prev->has_cpu to zero).
434          * Of course it must also be read before allowing prev
435          * to be rescheduled, but since the write depends on the read
436          * to complete, wmb() is enough. (the spin_lock() acquired
437          * before setting has_cpu is not enough because the spin_lock()
438          * common code semantics allows code outside the critical section
439          * to enter inside the critical section)
440          */
441         policy = prev->policy;
442         prev->policy = policy & ~SCHED_YIELD;
443         wmb();
444 
445         /*
446          * fast path falls through. We have to clear has_cpu before
447          * checking prev->state to avoid a wakeup race - thus we
448          * also have to protect against the task exiting early.
449          */
450         task_lock(prev);
451         prev->has_cpu = 0;
452         mb();
453         if (prev->state == TASK_RUNNING)
454                 goto needs_resched;
455 
456 out_unlock:
457         task_unlock(prev);      /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
458         return;
459 
460         /*
461          * Slow path - we 'push' the previous process and
462          * reschedule_idle() will attempt to find a new
463          * processor for it. (but it might preempt the
464          * current process as well.) We must take the runqueue
465          * lock and re-check prev->state to be correct. It might
466          * still happen that this process has a preemption
467          * 'in progress' already - but this is not a problem and
468          * might happen in other circumstances as well.
469          */
470 needs_resched:
471         {
472                 unsigned long flags;
473 
474                 /*
475                  * Avoid taking the runqueue lock in cases where
476                  * no preemption-check is necessery:
477                  */
478                 if ((prev == idle_task(smp_processor_id())) ||
479                                                 (policy & SCHED_YIELD))
480                         goto out_unlock;
481 
482                 spin_lock_irqsave(&runqueue_lock, flags);
483                 if (prev->state == TASK_RUNNING)
484                         reschedule_idle(prev);
485                 spin_unlock_irqrestore(&runqueue_lock, flags);
486                 goto out_unlock;
487         }
488 #else
489         prev->policy &= ~SCHED_YIELD;
490 #endif /* CONFIG_SMP */
491 }
492 
493 void schedule_tail(struct task_struct *prev)
494 {
495         __schedule_tail(prev);
496 }
497 
498 /*
499  *  'schedule()' is the scheduler function. It's a very simple and nice
500  * scheduler: it's not perfect, but certainly works for most things.
501  *
502  * The goto is "interesting".
503  *
504  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
505  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
506  * information in task[0] is never used.
507  */
508 asmlinkage void schedule(void)
509 {
510         struct schedule_data * sched_data;
511         struct task_struct *prev, *next, *p;
512         struct list_head *tmp;
513         int this_cpu, c;
514 
515         if (!current->active_mm) BUG();
516 need_resched_back:
517         prev = current;
518         this_cpu = prev->processor;
519 
520         if (in_interrupt())
521                 goto scheduling_in_interrupt;
522 
523         release_kernel_lock(prev, this_cpu);
524 
525         /* Do "administrative" work here while we don't hold any locks */
526         if (softirq_active(this_cpu) & softirq_mask(this_cpu))
527                 goto handle_softirq;
528 handle_softirq_back:
529 
530         /*
531          * 'sched_data' is protected by the fact that we can run
532          * only one process per CPU.
533          */
534         sched_data = & aligned_data[this_cpu].schedule_data;
535 
536         spin_lock_irq(&runqueue_lock);
537 
538         /* move an exhausted RR process to be last.. */
539         if (prev->policy == SCHED_RR)
540                 goto move_rr_last;
541 move_rr_back:
542 
543         switch (prev->state) {
544                 case TASK_INTERRUPTIBLE:
545                         if (signal_pending(prev)) {
546                                 prev->state = TASK_RUNNING;
547                                 break;
548                         }
549                 default:
550                         del_from_runqueue(prev);
551                 case TASK_RUNNING:
552         }
553         prev->need_resched = 0;
554 
555         /*
556          * this is the scheduler proper:
557          */
558 
559 repeat_schedule:
560         /*
561          * Default process to select..
562          */
563         next = idle_task(this_cpu);
564         c = -1000;
565         if (prev->state == TASK_RUNNING)
566                 goto still_running;
567 
568 still_running_back:
569         list_for_each(tmp, &runqueue_head) {
570                 p = list_entry(tmp, struct task_struct, run_list);
571                 if (can_schedule(p, this_cpu)) {
572                         int weight = goodness(p, this_cpu, prev->active_mm);
573                         if (weight > c)
574                                 c = weight, next = p;
575                 }
576         }
577 
578         /* Do we need to re-calculate counters? */
579         if (!c)
580                 goto recalculate;
581         /*
582          * from this point on nothing can prevent us from
583          * switching to the next task, save this fact in
584          * sched_data.
585          */
586         sched_data->curr = next;
587 #ifdef CONFIG_SMP
588         next->has_cpu = 1;
589         next->processor = this_cpu;
590 #endif
591         spin_unlock_irq(&runqueue_lock);
592 
593         if (prev == next)
594                 goto same_process;
595 
596 #ifdef CONFIG_SMP
597         /*
598          * maintain the per-process 'last schedule' value.
599          * (this has to be recalculated even if we reschedule to
600          * the same process) Currently this is only used on SMP,
601          * and it's approximate, so we do not have to maintain
602          * it while holding the runqueue spinlock.
603          */
604         sched_data->last_schedule = get_cycles();
605 
606         /*
607          * We drop the scheduler lock early (it's a global spinlock),
608          * thus we have to lock the previous process from getting
609          * rescheduled during switch_to().
610          */
611 
612 #endif /* CONFIG_SMP */
613 
614         kstat.context_swtch++;
615         /*
616          * there are 3 processes which are affected by a context switch:
617          *
618          * prev == .... ==> (last => next)
619          *
620          * It's the 'much more previous' 'prev' that is on next's stack,
621          * but prev is set to (the just run) 'last' process by switch_to().
622          * This might sound slightly confusing but makes tons of sense.
623          */
624         prepare_to_switch();
625         {
626                 struct mm_struct *mm = next->mm;
627                 struct mm_struct *oldmm = prev->active_mm;
628                 if (!mm) {
629                         if (next->active_mm) BUG();
630                         next->active_mm = oldmm;
631                         atomic_inc(&oldmm->mm_count);
632                         enter_lazy_tlb(oldmm, next, this_cpu);
633                 } else {
634                         if (next->active_mm != mm) BUG();
635                         switch_mm(oldmm, mm, next, this_cpu);
636                 }
637 
638                 if (!prev->mm) {
639                         prev->active_mm = NULL;
640                         mmdrop(oldmm);
641                 }
642         }
643 
644         /*
645          * This just switches the register state and the
646          * stack.
647          */
648         switch_to(prev, next, prev);
649         __schedule_tail(prev);
650 
651 same_process:
652         reacquire_kernel_lock(current);
653         if (current->need_resched)
654                 goto need_resched_back;
655 
656         return;
657 
658 recalculate:
659         {
660                 struct task_struct *p;
661                 spin_unlock_irq(&runqueue_lock);
662                 read_lock(&tasklist_lock);
663                 for_each_task(p)
664                         p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
665                 read_unlock(&tasklist_lock);
666                 spin_lock_irq(&runqueue_lock);
667         }
668         goto repeat_schedule;
669 
670 still_running:
671         c = goodness(prev, this_cpu, prev->active_mm);
672         next = prev;
673         goto still_running_back;
674 
675 handle_softirq:
676         do_softirq();
677         goto handle_softirq_back;
678 
679 move_rr_last:
680         if (!prev->counter) {
681                 prev->counter = NICE_TO_TICKS(prev->nice);
682                 move_last_runqueue(prev);
683         }
684         goto move_rr_back;
685 
686 scheduling_in_interrupt:
687         printk("Scheduling in interrupt\n");
688         BUG();
689         return;
690 }
691 
692 static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
693                                      unsigned int wq_mode, const int sync)
694 {
695         struct list_head *tmp, *head;
696         struct task_struct *p, *best_exclusive;
697         unsigned long flags;
698         int best_cpu, irq;
699 
700         if (!q)
701                 goto out;
702 
703         best_cpu = smp_processor_id();
704         irq = in_interrupt();
705         best_exclusive = NULL;
706         wq_write_lock_irqsave(&q->lock, flags);
707 
708 #if WAITQUEUE_DEBUG
709         CHECK_MAGIC_WQHEAD(q);
710 #endif
711 
712         head = &q->task_list;
713 #if WAITQUEUE_DEBUG
714         if (!head->next || !head->prev)
715                 WQ_BUG();
716 #endif
717         tmp = head->next;
718         while (tmp != head) {
719                 unsigned int state;
720                 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
721 
722                 tmp = tmp->next;
723 
724 #if WAITQUEUE_DEBUG
725                 CHECK_MAGIC(curr->__magic);
726 #endif
727                 p = curr->task;
728                 state = p->state;
729                 if (state & mode) {
730 #if WAITQUEUE_DEBUG
731                         curr->__waker = (long)__builtin_return_address(0);
732 #endif
733                         /*
734                          * If waking up from an interrupt context then
735                          * prefer processes which are affine to this
736                          * CPU.
737                          */
738                         if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) {
739                                 if (!best_exclusive)
740                                         best_exclusive = p;
741                                 if (p->processor == best_cpu) {
742                                         best_exclusive = p;
743                                         break;
744                                 }
745                         } else {
746                                 if (sync)
747                                         wake_up_process_synchronous(p);
748                                 else
749                                         wake_up_process(p);
750                                 if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)
751                                         break;
752                         }
753                 }
754         }
755         if (best_exclusive) {
756                 if (sync)
757                         wake_up_process_synchronous(best_exclusive);
758                 else
759                         wake_up_process(best_exclusive);
760         }
761         wq_write_unlock_irqrestore(&q->lock, flags);
762 out:
763         return;
764 }
765 
766 void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
767 {
768         __wake_up_common(q, mode, wq_mode, 0);
769 }
770 
771 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
772 {
773         __wake_up_common(q, mode, wq_mode, 1);
774 }
775 
776 #define SLEEP_ON_VAR                            \
777         unsigned long flags;                    \
778         wait_queue_t wait;                      \
779         init_waitqueue_entry(&wait, current);
780 
781 #define SLEEP_ON_HEAD                                   \
782         wq_write_lock_irqsave(&q->lock,flags);          \
783         __add_wait_queue(q, &wait);                     \
784         wq_write_unlock(&q->lock);
785 
786 #define SLEEP_ON_TAIL                                           \
787         wq_write_lock_irq(&q->lock);                            \
788         __remove_wait_queue(q, &wait);                          \
789         wq_write_unlock_irqrestore(&q->lock,flags);
790 
791 void interruptible_sleep_on(wait_queue_head_t *q)
792 {
793         SLEEP_ON_VAR
794 
795         current->state = TASK_INTERRUPTIBLE;
796 
797         SLEEP_ON_HEAD
798         schedule();
799         SLEEP_ON_TAIL
800 }
801 
802 long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
803 {
804         SLEEP_ON_VAR
805 
806         current->state = TASK_INTERRUPTIBLE;
807 
808         SLEEP_ON_HEAD
809         timeout = schedule_timeout(timeout);
810         SLEEP_ON_TAIL
811 
812         return timeout;
813 }
814 
815 void sleep_on(wait_queue_head_t *q)
816 {
817         SLEEP_ON_VAR
818         
819         current->state = TASK_UNINTERRUPTIBLE;
820 
821         SLEEP_ON_HEAD
822         schedule();
823         SLEEP_ON_TAIL
824 }
825 
826 long sleep_on_timeout(wait_queue_head_t *q, long timeout)
827 {
828         SLEEP_ON_VAR
829         
830         current->state = TASK_UNINTERRUPTIBLE;
831 
832         SLEEP_ON_HEAD
833         timeout = schedule_timeout(timeout);
834         SLEEP_ON_TAIL
835 
836         return timeout;
837 }
838 
839 void scheduling_functions_end_here(void) { }
840 
841 #ifndef __alpha__
842 
843 /*
844  * This has been replaced by sys_setpriority.  Maybe it should be
845  * moved into the arch dependent tree for those ports that require
846  * it for backward compatibility?
847  */
848 
849 asmlinkage long sys_nice(int increment)
850 {
851         long newprio;
852 
853         /*
854          *      Setpriority might change our priority at the same moment.
855          *      We don't have to worry. Conceptually one call occurs first
856          *      and we have a single winner.
857          */
858         if (increment < 0) {
859                 if (!capable(CAP_SYS_NICE))
860                         return -EPERM;
861                 if (increment < -40)
862                         increment = -40;
863         }
864         if (increment > 40)
865                 increment = 40;
866 
867         newprio = current->nice + increment;
868         if (newprio < -20)
869                 newprio = -20;
870         if (newprio > 19)
871                 newprio = 19;
872         current->nice = newprio;
873         return 0;
874 }
875 
876 #endif
877 
878 static inline struct task_struct *find_process_by_pid(pid_t pid)
879 {
880         struct task_struct *tsk = current;
881 
882         if (pid)
883                 tsk = find_task_by_pid(pid);
884         return tsk;
885 }
886 
887 static int setscheduler(pid_t pid, int policy, 
888                         struct sched_param *param)
889 {
890         struct sched_param lp;
891         struct task_struct *p;
892         int retval;
893 
894         retval = -EINVAL;
895         if (!param || pid < 0)
896                 goto out_nounlock;
897 
898         retval = -EFAULT;
899         if (copy_from_user(&lp, param, sizeof(struct sched_param)))
900                 goto out_nounlock;
901 
902         /*
903          * We play safe to avoid deadlocks.
904          */
905         read_lock_irq(&tasklist_lock);
906         spin_lock(&runqueue_lock);
907 
908         p = find_process_by_pid(pid);
909 
910         retval = -ESRCH;
911         if (!p)
912                 goto out_unlock;
913                         
914         if (policy < 0)
915                 policy = p->policy;
916         else {
917                 retval = -EINVAL;
918                 if (policy != SCHED_FIFO && policy != SCHED_RR &&
919                                 policy != SCHED_OTHER)
920                         goto out_unlock;
921         }
922         
923         /*
924          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
925          * priority for SCHED_OTHER is 0.
926          */
927         retval = -EINVAL;
928         if (lp.sched_priority < 0 || lp.sched_priority > 99)
929                 goto out_unlock;
930         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
931                 goto out_unlock;
932 
933         retval = -EPERM;
934         if ((policy == SCHED_FIFO || policy == SCHED_RR) && 
935             !capable(CAP_SYS_NICE))
936                 goto out_unlock;
937         if ((current->euid != p->euid) && (current->euid != p->uid) &&
938             !capable(CAP_SYS_NICE))
939                 goto out_unlock;
940 
941         retval = 0;
942         p->policy = policy;
943         p->rt_priority = lp.sched_priority;
944         if (task_on_runqueue(p))
945                 move_first_runqueue(p);
946 
947         current->need_resched = 1;
948 
949 out_unlock:
950         spin_unlock(&runqueue_lock);
951         read_unlock_irq(&tasklist_lock);
952 
953 out_nounlock:
954         return retval;
955 }
956 
957 asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, 
958                                       struct sched_param *param)
959 {
960         return setscheduler(pid, policy, param);
961 }
962 
963 asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
964 {
965         return setscheduler(pid, -1, param);
966 }
967 
968 asmlinkage long sys_sched_getscheduler(pid_t pid)
969 {
970         struct task_struct *p;
971         int retval;
972 
973         retval = -EINVAL;
974         if (pid < 0)
975                 goto out_nounlock;
976 
977         retval = -ESRCH;
978         read_lock(&tasklist_lock);
979         p = find_process_by_pid(pid);
980         if (p)
981                 retval = p->policy & ~SCHED_YIELD;
982         read_unlock(&tasklist_lock);
983 
984 out_nounlock:
985         return retval;
986 }
987 
988 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
989 {
990         struct task_struct *p;
991         struct sched_param lp;
992         int retval;
993 
994         retval = -EINVAL;
995         if (!param || pid < 0)
996                 goto out_nounlock;
997 
998         read_lock(&tasklist_lock);
999         p = find_process_by_pid(pid);
1000         retval = -ESRCH;
1001         if (!p)
1002                 goto out_unlock;
1003         lp.sched_priority = p->rt_priority;
1004         read_unlock(&tasklist_lock);
1005 
1006         /*
1007          * This one might sleep, we cannot do it with a spinlock held ...
1008          */
1009         retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
1010 
1011 out_nounlock:
1012         return retval;
1013 
1014 out_unlock:
1015         read_unlock(&tasklist_lock);
1016         return retval;
1017 }
1018 
1019 asmlinkage long sys_sched_yield(void)
1020 {
1021         /*
1022          * Trick. sched_yield() first counts the number of truly 
1023          * 'pending' runnable processes, then returns if it's
1024          * only the current processes. (This test does not have
1025          * to be atomic.) In threaded applications this optimization
1026          * gets triggered quite often.
1027          */
1028 
1029         int nr_pending = nr_running;
1030 
1031 #if CONFIG_SMP
1032         int i;
1033 
1034         // Substract non-idle processes running on other CPUs.
1035         for (i = 0; i < smp_num_cpus; i++)
1036                 if (aligned_data[i].schedule_data.curr != idle_task(i))
1037                         nr_pending--;
1038 #else
1039         // on UP this process is on the runqueue as well
1040         nr_pending--;
1041 #endif
1042         if (nr_pending) {
1043                 /*
1044                  * This process can only be rescheduled by us,
1045                  * so this is safe without any locking.
1046                  */
1047                 if (current->policy == SCHED_OTHER)
1048                         current->policy |= SCHED_YIELD;
1049                 current->need_resched = 1;
1050         }
1051         return 0;
1052 }
1053 
1054 asmlinkage long sys_sched_get_priority_max(int policy)
1055 {
1056         int ret = -EINVAL;
1057 
1058         switch (policy) {
1059         case SCHED_FIFO:
1060         case SCHED_RR:
1061                 ret = 99;
1062                 break;
1063         case SCHED_OTHER:
1064                 ret = 0;
1065                 break;
1066         }
1067         return ret;
1068 }
1069 
1070 asmlinkage long sys_sched_get_priority_min(int policy)
1071 {
1072         int ret = -EINVAL;
1073 
1074         switch (policy) {
1075         case SCHED_FIFO:
1076         case SCHED_RR:
1077                 ret = 1;
1078                 break;
1079         case SCHED_OTHER:
1080                 ret = 0;
1081         }
1082         return ret;
1083 }
1084 
1085 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
1086 {
1087         struct timespec t;
1088         struct task_struct *p;
1089         int retval = -EINVAL;
1090 
1091         if (pid < 0)
1092                 goto out_nounlock;
1093 
1094         retval = -ESRCH;
1095         read_lock(&tasklist_lock);
1096         p = find_process_by_pid(pid);
1097         if (p)
1098                 jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
1099                                     &t);
1100         read_unlock(&tasklist_lock);
1101         if (p)
1102                 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
1103 out_nounlock:
1104         return retval;
1105 }
1106 
1107 static void show_task(struct task_struct * p)
1108 {
1109         unsigned long free = 0;
1110         int state;
1111         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1112 
1113         printk("%-8s  ", p->comm);
1114         state = p->state ? ffz(~p->state) + 1 : 0;
1115         if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
1116                 printk(stat_nam[state]);
1117         else
1118                 printk(" ");
1119 #if (BITS_PER_LONG == 32)
1120         if (p == current)
1121                 printk(" current  ");
1122         else
1123                 printk(" %08lX ", thread_saved_pc(&p->thread));
1124 #else
1125         if (p == current)
1126                 printk("   current task   ");
1127         else
1128                 printk(" %016lx ", thread_saved_pc(&p->thread));
1129 #endif
1130         {
1131                 unsigned long * n = (unsigned long *) (p+1);
1132                 while (!*n)
1133                         n++;
1134                 free = (unsigned long) n - (unsigned long)(p+1);
1135         }
1136         printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid);
1137         if (p->p_cptr)
1138                 printk("%5d ", p->p_cptr->pid);
1139         else
1140                 printk("      ");
1141         if (!p->mm)
1142                 printk(" (L-TLB) ");
1143         else
1144                 printk(" (NOTLB) ");
1145         if (p->p_ysptr)
1146                 printk("%7d", p->p_ysptr->pid);
1147         else
1148                 printk("       ");
1149         if (p->p_osptr)
1150                 printk(" %5d\n", p->p_osptr->pid);
1151         else
1152                 printk("\n");
1153 
1154 #ifdef CONFIG_X86
1155 /* This is very useful, but only works on x86 right now */
1156         {
1157                 extern void show_trace(unsigned long);
1158                 show_trace(p->thread.esp);
1159         }
1160 #endif
1161 }
1162 
1163 char * render_sigset_t(sigset_t *set, char *buffer)
1164 {
1165         int i = _NSIG, x;
1166         do {
1167                 i -= 4, x = 0;
1168                 if (sigismember(set, i+1)) x |= 1;
1169                 if (sigismember(set, i+2)) x |= 2;
1170                 if (sigismember(set, i+3)) x |= 4;
1171                 if (sigismember(set, i+4)) x |= 8;
1172                 *buffer++ = (x < 10 ? '' : 'a' - 10) + x;
1173         } while (i >= 4);
1174         *buffer = 0;
1175         return buffer;
1176 }
1177 
1178 void show_state(void)
1179 {
1180         struct task_struct *p;
1181 
1182 #if (BITS_PER_LONG == 32)
1183         printk("\n"
1184                "                         free                        sibling\n");
1185         printk("  task             PC    stack   pid father child younger older\n");
1186 #else
1187         printk("\n"
1188                "                                 free                        sibling\n");
1189         printk("  task                 PC        stack   pid father child younger older\n");
1190 #endif
1191         read_lock(&tasklist_lock);
1192         for_each_task(p)
1193                 show_task(p);
1194         read_unlock(&tasklist_lock);
1195 }
1196 
1197 /*
1198  *      Put all the gunge required to become a kernel thread without
1199  *      attached user resources in one place where it belongs.
1200  */
1201 
1202 void daemonize(void)
1203 {
1204         struct fs_struct *fs;
1205 
1206 
1207         /*
1208          * If we were started as result of loading a module, close all of the
1209          * user space pages.  We don't need them, and if we didn't close them
1210          * they would be locked into memory.
1211          */
1212         exit_mm(current);
1213 
1214         current->session = 1;
1215         current->pgrp = 1;
1216 
1217         /* Become as one with the init task */
1218 
1219         exit_fs(current);       /* current->fs->count--; */
1220         fs = init_task.fs;
1221         current->fs = fs;
1222         atomic_inc(&fs->count);
1223         exit_files(current);
1224         current->files = init_task.files;
1225         atomic_inc(&current->files->count);
1226 }
1227 
1228 void __init init_idle(void)
1229 {
1230         struct schedule_data * sched_data;
1231         sched_data = &aligned_data[smp_processor_id()].schedule_data;
1232 
1233         if (current != &init_task && task_on_runqueue(current)) {
1234                 printk("UGH! (%d:%d) was on the runqueue, removing.\n",
1235                         smp_processor_id(), current->pid);
1236                 del_from_runqueue(current);
1237         }
1238         sched_data->curr = current;
1239         sched_data->last_schedule = get_cycles();
1240 }
1241 
1242 extern void init_timervecs (void);
1243 
1244 void __init sched_init(void)
1245 {
1246         /*
1247          * We have to do a little magic to get the first
1248          * process right in SMP mode.
1249          */
1250         int cpu = smp_processor_id();
1251         int nr;
1252 
1253         init_task.processor = cpu;
1254 
1255         for(nr = 0; nr < PIDHASH_SZ; nr++)
1256                 pidhash[nr] = NULL;
1257 
1258         init_timervecs();
1259 
1260         init_bh(TIMER_BH, timer_bh);
1261         init_bh(TQUEUE_BH, tqueue_bh);
1262         init_bh(IMMEDIATE_BH, immediate_bh);
1263 
1264         /*
1265          * The boot idle thread does lazy MMU switching as well:
1266          */
1267         atomic_inc(&init_mm.mm_count);
1268         enter_lazy_tlb(&init_mm, current, cpu);
1269 }
1270 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.