Permallocate scheduler cells in chunks

Scheduler cells are small objects that come and go in great quantities. Even though they are allocated and deallocated a lot, they tend to a steady state quantity, so put together a heap that is unique for each cell type. This heap actually saves memory overall because cells are allocated in chunks, thus eliminating allocator overhead, and they are pulled/pushed from/to a heap very quickly so that what overhead remains is slight and bounded.
2008-06-12 19:55:53 -07:00 · 2008-06-12 19:55:53 -07:00 · 4af4c8cca9
parent 3c4346acb2
commit 4af4c8cca9
4 changed files with 231 additions and 33 deletions
--- a/vvp/main.cc
+++ b/vvp/main.cc
@ -303,13 +303,21 @@ int main(int argc, char*argv[])

 	    vpi_mcd_printf(1, "Event counts:\n");
 	    vpi_mcd_printf(1, "    %8lu time steps (pool=%lu)\n",
-			   count_time_events, count_time_pool);
+			   count_time_events, count_time_pool());
 	    vpi_mcd_printf(1, "    %8lu thread schedule events\n",
 		    count_thread_events);
 	    vpi_mcd_printf(1, "    %8lu assign events\n",
 		    count_assign_events);
-	    vpi_mcd_printf(1, "    %8lu other events\n",
-		    count_gen_events);
+	    vpi_mcd_printf(1, "             ...assign(vec4) pool=%lu\n",
+			   count_assign4_pool());
+	    vpi_mcd_printf(1, "             ...assign(vec8) pool=%lu\n",
+			   count_assign8_pool());
+	    vpi_mcd_printf(1, "             ...assign(real) pool=%lu\n",
+			   count_assign_real_pool());
+	    vpi_mcd_printf(1, "             ...assign(word) pool=%lu\n",
+			   count_assign_aword_pool());
+	    vpi_mcd_printf(1, "    %8lu other events (pool=%lu)\n",
+			   count_gen_events, count_gen_pool());
      }

      return vvp_return_value;
--- a/vvp/schedule.cc
+++ b/vvp/schedule.cc
@ -19,9 +19,8 @@

 # include  "schedule.h"
 # include  "vthread.h"
-#ifdef HAVE_MALLOC_H
-# include  <malloc.h>
-#endif
+# include  "slab.h"
+# include  <new>
 # include  <signal.h>
 # include  <stdlib.h>
 # include  <assert.h>
@ -33,7 +32,7 @@ unsigned long count_gen_events = 0;
 unsigned long count_thread_events = 0;
  // Count the time events (A time cell created)
 unsigned long count_time_events = 0;
-unsigned long count_time_pool = 0;
+


 /*
@ -50,9 +49,14 @@ struct event_s {
      struct event_s*next;
      virtual ~event_s() { }
      virtual void run_run(void) =0;
+
+	// Fallback new/delete
+      static void*operator new (size_t size) { return ::new char[size]; }
+      static void operator delete(void*ptr)  { ::delete[]( (char*)ptr ); }
 };

 struct event_time_s {
+      event_time_s() { count_time_events += 1; }
      vvp_time64_t delay;

      struct event_s*active;
@ -63,8 +67,8 @@ struct event_time_s {

      struct event_time_s*next;

-      void* operator new (size_t);
-      void operator delete(void*obj, size_t s);
+      static void* operator new (size_t);
+      static void operator delete(void*obj, size_t s);
 };

 vvp_gen_event_s::~vvp_gen_event_s()
@ -77,6 +81,9 @@ vvp_gen_event_s::~vvp_gen_event_s()
 struct vthread_event_s : public event_s {
      vthread_t thr;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void vthread_event_s::run_run(void)
@ -85,6 +92,20 @@ void vthread_event_s::run_run(void)
      vthread_run(thr);
 }

+static const size_t VTHR_CHUNK_COUNT = 8192 / sizeof(struct vthread_event_s);
+static slab_t<sizeof(vthread_event_s),VTHR_CHUNK_COUNT> vthread_event_heap;
+
+inline void* vthread_event_s::operator new(size_t size)
+{
+      assert(size == sizeof(vthread_event_s));
+      return vthread_event_heap.alloc_slab();
+}
+
+void vthread_event_s::operator delete(void*ptr)
+{
+      vthread_event_heap.free_slab(ptr);
+}
+
 struct del_thr_event_s : public event_s {
      vthread_t thr;
      void run_run(void);
@ -111,6 +132,9 @@ struct assign_vector4_event_s  : public event_s {
 	/* Width of the destination vector. */
      unsigned vwid;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void assign_vector4_event_s::run_run(void)
@ -122,10 +146,29 @@ void assign_vector4_event_s::run_run(void)
 	    vvp_send_vec4(ptr, val);
 }

+static const size_t ASSIGN4_CHUNK_COUNT = 524288 / sizeof(struct assign_vector4_event_s);
+static slab_t<sizeof(assign_vector4_event_s),ASSIGN4_CHUNK_COUNT> assign4_heap;
+
+inline void* assign_vector4_event_s::operator new(size_t size)
+{
+      assert(size == sizeof(assign_vector4_event_s));
+      return assign4_heap.alloc_slab();
+}
+
+void assign_vector4_event_s::operator delete(void*ptr)
+{
+      assign4_heap.free_slab(ptr);
+}
+
+unsigned long count_assign4_pool(void) { return assign4_heap.pool; }
+
 struct assign_vector8_event_s  : public event_s {
      vvp_net_ptr_t ptr;
      vvp_vector8_t val;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void assign_vector8_event_s::run_run(void)
@ -134,10 +177,29 @@ void assign_vector8_event_s::run_run(void)
      vvp_send_vec8(ptr, val);
 }

+static const size_t ASSIGN8_CHUNK_COUNT = 8192 / sizeof(struct assign_vector8_event_s);
+static slab_t<sizeof(assign_vector8_event_s),ASSIGN8_CHUNK_COUNT> assign8_heap;
+
+inline void* assign_vector8_event_s::operator new(size_t size)
+{
+      assert(size == sizeof(assign_vector8_event_s));
+      return assign8_heap.alloc_slab();
+}
+
+void assign_vector8_event_s::operator delete(void*ptr)
+{
+      assign8_heap.free_slab(ptr);
+}
+
+unsigned long count_assign8_pool() { return assign8_heap.pool; }
+
 struct assign_real_event_s  : public event_s {
      vvp_net_ptr_t ptr;
      double val;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void assign_real_event_s::run_run(void)
@ -146,12 +208,31 @@ void assign_real_event_s::run_run(void)
      vvp_send_real(ptr, val);
 }

+static const size_t ASSIGNR_CHUNK_COUNT = 8192 / sizeof(struct assign_real_event_s);
+static slab_t<sizeof(assign_real_event_s),ASSIGNR_CHUNK_COUNT> assignr_heap;
+
+inline void* assign_real_event_s::operator new (size_t size)
+{
+      assert(size == sizeof(assign_real_event_s));
+      return assignr_heap.alloc_slab();
+}
+
+void assign_real_event_s::operator delete(void*ptr)
+{
+      assignr_heap.free_slab(ptr);
+}
+
+unsigned long count_assign_real_pool(void) { return assignr_heap.pool; }
+
 struct assign_array_word_s  : public event_s {
      vvp_array_t mem;
      unsigned adr;
      vvp_vector4_t val;
      unsigned off;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void assign_array_word_s::run_run(void)
@ -160,10 +241,29 @@ void assign_array_word_s::run_run(void)
      array_set_word(mem, adr, off, val);
 }

+static const size_t ARRAY_W_CHUNK_COUNT = 8192 / sizeof(struct assign_array_word_s);
+static slab_t<sizeof(assign_array_word_s),ARRAY_W_CHUNK_COUNT> array_w_heap;
+
+inline void* assign_array_word_s::operator new (size_t size)
+{
+      assert(size == sizeof(assign_array_word_s));
+      return array_w_heap.alloc_slab();
+}
+
+void assign_array_word_s::operator delete(void*ptr)
+{
+      array_w_heap.free_slab(ptr);
+}
+
+unsigned long count_assign_aword_pool(void) { return array_w_heap.pool; }
+
 struct generic_event_s : public event_s {
      vvp_gen_event_t obj;
      unsigned char val;
      void run_run(void);
+
+      static void* operator new(size_t);
+      static void operator delete(void*);
 };

 void generic_event_s::run_run(void)
@ -173,44 +273,45 @@ void generic_event_s::run_run(void)
 	    obj->run_run();
 }

+static const size_t GENERIC_CHUNK_COUNT = 131072 / sizeof(struct generic_event_s);
+static slab_t<sizeof(generic_event_s),GENERIC_CHUNK_COUNT> generic_event_heap;
+
+inline void* generic_event_s::operator new(size_t size)
+{
+      assert(size == sizeof(generic_event_s));
+      return generic_event_heap.alloc_slab();
+}
+
+void generic_event_s::operator delete(void*ptr)
+{
+      generic_event_heap.free_slab(ptr);
+}
+
+unsigned long count_gen_pool(void) { return generic_event_heap.pool; }
+
 /*
 ** These event_time_s will be required a lot, at high frequency.
 ** Once allocated, we never free them, but stash them away for next time.
 */


-static struct event_time_s* time_free_list = 0;
-static const unsigned TIME_CHUNK_COUNT = 8192 / sizeof(struct event_time_s);
+static const size_t TIME_CHUNK_COUNT = 8192 / sizeof(struct event_time_s);
+static slab_t<sizeof(event_time_s),TIME_CHUNK_COUNT> event_time_heap;

 inline void* event_time_s::operator new (size_t size)
 {
      assert(size == sizeof(struct event_time_s));
-
-      struct event_time_s* cur = time_free_list;
-      if (!cur) {
-	    cur = (struct event_time_s*)
-		  malloc(TIME_CHUNK_COUNT * sizeof(struct event_time_s));
-	    for (unsigned idx = 1 ;  idx < TIME_CHUNK_COUNT ;  idx += 1) {
-		  cur[idx].next = time_free_list;
-		  time_free_list = cur + idx;
-	    }
-
-	    count_time_pool += TIME_CHUNK_COUNT;
-
-      } else {
-	    time_free_list = cur->next;
-      }
-
-      return cur;
+      void*ptr = event_time_heap.alloc_slab();
+      return ptr;
 }

-inline void event_time_s::operator delete(void*obj, size_t size)
+inline void event_time_s::operator delete(void*ptr, size_t size)
 {
-      struct event_time_s*cur = reinterpret_cast<event_time_s*>(obj);
-      cur->next = time_free_list;
-      time_free_list = cur;
+      event_time_heap.free_slab(ptr);
 }

+unsigned long count_time_pool(void) { return event_time_heap.pool; }
+
 /*
 * This is the head of the list of pending events. This includes all
 * the events that have not been executed yet, and reaches into the
--- a/vvp/slab.h
+++ b/vvp/slab.h
@ -0,0 +1,80 @@
+#ifndef __slab_H
+#define __slab_H
+/*
+ * Copyright (c) 2008 Picture Elements, Inc.
+ *    Stephen Williams (steve@icarus.com)
+ *
+ *    This source code is free software; you can redistribute it
+ *    and/or modify it in source code form under the terms of the GNU
+ *    General Public License as published by the Free Software
+ *    Foundation; either version 2 of the License, or (at your option)
+ *    any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+template <size_t SLAB_SIZE, size_t CHUNK_COUNT> class slab_t {
+
+      union item_cell_u {
+	    item_cell_u*next;
+	    char space[SLAB_SIZE];
+      };
+
+    public:
+      slab_t();
+
+      void* alloc_slab();
+      void  free_slab(void*);
+
+      unsigned long pool;
+
+    private:
+      item_cell_u*heap_;
+      item_cell_u initial_chunk_[CHUNK_COUNT];
+};
+
+template <size_t SLAB_SIZE, size_t CHUNK_COUNT>
+slab_t<SLAB_SIZE,CHUNK_COUNT>::slab_t()
+{
+      pool = CHUNK_COUNT;
+      heap_ = initial_chunk_;
+      for (unsigned idx = 0 ; idx < CHUNK_COUNT-1 ; idx += 1)
+	    initial_chunk_[idx].next = initial_chunk_+idx+1;
+
+      initial_chunk_[CHUNK_COUNT-1].next = 0;
+}
+
+template <size_t SLAB_SIZE, size_t CHUNK_COUNT>
+inline void* slab_t<SLAB_SIZE,CHUNK_COUNT>::alloc_slab()
+{
+      if (heap_ == 0) {
+	    item_cell_u*chunk = new item_cell_u[CHUNK_COUNT];
+	    for (unsigned idx = 0 ; idx < CHUNK_COUNT ; idx += 1) {
+		  chunk[idx].next = heap_;
+		  heap_ = chunk+idx;
+	    }
+	    pool += CHUNK_COUNT;
+      }
+
+      item_cell_u*cur = heap_;
+      heap_ = heap_->next;
+      return cur;
+}
+
+template <size_t SLAB_SIZE, size_t CHUNK_COUNT>
+inline void slab_t<SLAB_SIZE,CHUNK_COUNT>::free_slab(void*ptr)
+{
+      item_cell_u*cur = reinterpret_cast<item_cell_u*> (ptr);
+      cur->next = heap_;
+      heap_ = cur;
+}
+
+#endif
--- a/vvp/statistics.h
+++ b/vvp/statistics.h
@ -33,7 +33,16 @@ extern unsigned long count_vpi_scopes;
 extern unsigned long count_vpi_memories;

 extern unsigned long count_time_events;
-extern unsigned long count_time_pool;
+extern unsigned long count_time_pool(void);
+
+extern unsigned long count_assign_events;
+extern unsigned long count_assign4_pool(void);
+extern unsigned long count_assign8_pool(void);
+extern unsigned long count_assign_real_pool(void);
+extern unsigned long count_assign_aword_pool(void);
+
+extern unsigned long count_gen_events;
+extern unsigned long count_gen_pool(void);

 extern size_t size_opcodes;
 extern size_t size_vvp_nets;