diff --git a/malloc/malloc.c b/malloc/malloc.c
index c0423a2e10..54769cbbd6 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -292,13 +292,19 @@
 #if USE_TCACHE
 /* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
 # define TCACHE_MAX_BINS		64
+/* Number of bins beyond the TCACHE_MAX_BINS reserved for ranges of chunksizes
+   to allow tcaches to cache chunks beyond ~1k size.  */
+# define TCACHE_UNBOUND_SIZE_BINS	10
+# define TCACHE_ALL_BINS	(TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS)
 # define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+# define TCACHE_FIXED_SIZE_BINS	\
+	   (mp_.tcache_bins < TCACHE_MAX_BINS ? mp_.tcache_bins : TCACHE_MAX_BINS)
 
 /* Only used to pre-fill the tunables.  */
-# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - MALLOC_ALIGNMENT + 1)
 
-/* When "x" is from chunksize().  */
-# define csize2tidx(x) (((x) - MINSIZE) / MALLOC_ALIGNMENT)
+/* When "x" is from chunksize(). Only usable for single sized tcache bins.  */
+# define fast_csize2tidx(x) (((x) - MINSIZE) / MALLOC_ALIGNMENT)
 /* When "x" is a user-provided size.  */
 # define usize2tidx(x) csize2tidx (request2size (x))
 
@@ -1929,7 +1935,7 @@ static struct malloc_par mp_ =
   ,
   .tcache_count = TCACHE_FILL_COUNT,
   .tcache_bins = TCACHE_MAX_BINS,
-  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_max_bytes = MAX_TCACHE_SIZE,
   .tcache_unsorted_limit = 0 /* No limit.  */
 #endif
 };
@@ -3122,8 +3128,8 @@ typedef struct tcache_entry
    time), this is for performance reasons.  */
 typedef struct tcache_perthread_struct
 {
-  uint16_t counts[TCACHE_MAX_BINS];
-  tcache_entry *entries[TCACHE_MAX_BINS];
+  uint16_t counts[TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS];
 } tcache_perthread_struct;
 
 static __thread bool tcache_shutting_down = false;
@@ -3156,6 +3162,45 @@ tcache_key_initialize (void)
     }
 }
 
+static __always_inline size_t
+csize2tidx(size_t nb)
+{
+  if (__glibc_likely (nb < tidx2usize (TCACHE_MAX_BINS)))
+    return fast_csize2tidx(nb);
+  else if (__glibc_likely (mp_.tcache_bins < TCACHE_MAX_BINS))
+    return TCACHE_MAX_BINS;
+  else
+    {
+      size_t idx = TCACHE_MAX_BINS
+	      + __builtin_clz (tidx2usize (TCACHE_MAX_BINS-1))
+	      - __builtin_clz (nb);
+      return idx < TCACHE_ALL_BINS ? idx : TCACHE_ALL_BINS - 1;
+    }
+}
+
+/* Returns the position in the tcache bin where the specified size is first found in.
+   This function is used by both tcache_get and tcache_put to locate the proper
+   position in the tcache bin linked-list.  */
+static __always_inline tcache_entry **
+tcache_location_for_size (size_t nb, size_t tc_idx)
+{
+  if (__glibc_likely (nb <= tidx2usize (TCACHE_MAX_BINS-1)))
+    return &(tcache->entries[tc_idx]);
+  else
+    {
+      tcache_entry **tep = &(tcache->entries[tc_idx]);
+      tcache_entry *te = REVEAL_PTR (*tep);
+      while (te != NULL
+             && __glibc_unlikely (chunksize (mem2chunk (te)) < nb))
+        {
+          tep = & (te->next);
+          te = REVEAL_PTR (te->next);
+        }
+
+      return tep;
+    }
+}
+
 /* Caller must ensure that we know tc_idx is valid and there's room
    for more chunks.  */
 static __always_inline void
@@ -3167,8 +3212,9 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
      detect a double free.  */
   e->key = tcache_key;
 
-  e->next = PROTECT_PTR (&e->next, REVEAL_PTR (tcache->entries[tc_idx]));
-  tcache->entries[tc_idx] = PROTECT_PTR (&(tcache->entries[tc_idx]), e);
+  tcache_entry **entry = tcache_location_for_size (chunksize (chunk), tc_idx);
+  e->next = PROTECT_PTR (&e->next, REVEAL_PTR (*entry));
+  *entry = PROTECT_PTR (entry, e);
   ++(tcache->counts[tc_idx]);
 }
 
@@ -3176,7 +3222,7 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
    available chunks to remove.  Removes chunk from the middle of the
    list.  */
 static __always_inline void *
-tcache_get_n (size_t tc_idx, tcache_entry **ep)
+tcache_remove_entry (size_t tc_idx, tcache_entry **ep)
 {
   tcache_entry *e = REVEAL_PTR (*ep);
 
@@ -3190,18 +3236,18 @@ tcache_get_n (size_t tc_idx, tcache_entry **ep)
   return (void *) e;
 }
 
-/* Like the above, but removes from the head of the list.  */
+/* Like the above, but traverses the list to find proper location to get the
+   entry within the respective tcache bin.  */
 static __always_inline void *
-tcache_get (size_t tc_idx)
+tcache_get (size_t nb, size_t tc_idx)
 {
-  return tcache_get_n (tc_idx, & tcache->entries[tc_idx]);
-}
+  tcache_entry **entry = tcache_location_for_size (nb, tc_idx);
+  tcache_entry *e = REVEAL_PTR (*entry);
+  if (__glibc_unlikely (tc_idx >= TCACHE_MAX_BINS)
+      && (e == NULL || chunksize (mem2chunk (e)) != nb))
+    return NULL;
 
-/* Iterates through the tcache linked list.  */
-static __always_inline tcache_entry *
-tcache_next (tcache_entry *e)
-{
-  return (tcache_entry *) REVEAL_PTR (e->next);
+  return tcache_remove_entry (tc_idx, entry);
 }
 
 /* Check if tcache is available for alloc by corresponding tc_idx.  */
@@ -3258,7 +3304,7 @@ tcache_thread_shutdown (void)
 
   /* Free all of the entries and the tcache itself back to the arena
      heap for coalescing.  */
-  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+  for (i = 0; i < TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS; ++i)
     {
       while (REVEAL_PTR (tcache_tmp->entries[i]))
 	{
@@ -3317,7 +3363,7 @@ tcache_init(void)
       memset (tcache, 0, sizeof (tcache_perthread_struct));
     }
 
-  for (int i = 0; i < mp_.tcache_bins; i++)
+  for (int i = 0; i < TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS; i++)
     tcache->entries[i] = PROTECT_PTR (&(tcache->entries[i]), NULL);
 }
 
@@ -3344,7 +3390,7 @@ tcache_try_malloc (size_t bytes, void **memptr)
   MAYBE_INIT_TCACHE ();
 
   if (tcache_available (tc_idx))
-    *memptr = tcache_get (tc_idx);
+    *memptr = tcache_get (tbytes, tc_idx);
   else
     *memptr = NULL;
 
@@ -3373,8 +3419,20 @@ __libc_malloc2 (size_t bytes)
   if (!__malloc_initialized)
     ptmalloc_init ();
 
+#if USE_TCACHE
   MAYBE_INIT_TCACHE ();
 
+  size_t nb = checked_request2size (bytes);
+  size_t tc_idx = csize2tidx (nb);
+
+  if (tcache_available (tc_idx))
+    {
+      void *tmp = tcache_get (nb, tc_idx);
+      if (tmp != NULL)
+        return tag_new_usable (tmp);
+    }
+#endif
+
   if (SINGLE_THREAD_P)
     {
       victim = tag_new_usable (_int_malloc (&main_arena, bytes));
@@ -3409,10 +3467,17 @@ void *
 __libc_malloc (size_t bytes)
 {
 #if USE_TCACHE
-  size_t tc_idx = csize2tidx (checked_request2size (bytes));
+  size_t nb = checked_request2size (bytes);
+  size_t tc_idx = fast_csize2tidx (nb);
 
-  if (tcache_available (tc_idx))
-    return tag_new_usable (tcache_get (tc_idx));
+  /* Limit this call to chunks that fit in same dimention tcache bins.
+     Large chunk sizes are cached inside __libc_malloc2.  */
+  if (tc_idx < TCACHE_MAX_BINS
+      && tcache_available (tc_idx))
+    {
+      tcache_entry **entry = &tcache->entries[tc_idx];
+      return tag_new_usable (tcache_remove_entry (tc_idx, entry));
+    }
 #endif
 
   return __libc_malloc2 (bytes);
@@ -3683,20 +3748,28 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
 
     if (tcache_available (tc_idx))
       {
-	/* The tcache itself isn't encoded, but the chain is.  */
-	tcache_entry **tep = & tcache->entries[tc_idx];
+	tcache_entry **tep = tcache_location_for_size (tbytes, tc_idx);
 	tcache_entry *te = REVEAL_PTR (*tep);
+
+	/* Make sure returned chunk is of expected size.  */
+	if (te == NULL
+	    || (tc_idx >= TCACHE_MAX_BINS
+		&& chunksize (mem2chunk (te)) != tbytes))
+	  goto tcache_memalign_abort;
+
+	/* The tcache itself isn't encoded, but the chain is.  */
 	while (te != NULL && !PTR_IS_ALIGNED (te, alignment))
 	  {
 	    tep = & (te->next);
-	    te = tcache_next (te);
+	    te = REVEAL_PTR (te->next);
 	  }
 	if (te != NULL)
 	  {
-	    void *victim = tcache_get_n (tc_idx, tep);
+	    void *victim = tcache_remove_entry (tc_idx, tep);
 	    return tag_new_usable (victim);
 	  }
       }
+tcache_memalign_abort:
   }
 #endif
 
@@ -3983,8 +4056,8 @@ _int_malloc (mstate av, size_t bytes)
 #if USE_TCACHE
 	      /* While we're here, if we see other chunks of the same size,
 		 stash them in the tcache.  */
-	      size_t tc_idx = csize2tidx (nb);
-	      if (tcache != NULL && tc_idx < mp_.tcache_bins)
+	      size_t tc_idx = fast_csize2tidx (nb);
+	      if (tcache != NULL && tc_idx < TCACHE_FIXED_SIZE_BINS)
 		{
 		  mchunkptr tc_victim;
 
@@ -4044,8 +4117,8 @@ _int_malloc (mstate av, size_t bytes)
 #if USE_TCACHE
 	  /* While we're here, if we see other chunks of the same size,
 	     stash them in the tcache.  */
-	  size_t tc_idx = csize2tidx (nb);
-	  if (tcache != NULL && tc_idx < mp_.tcache_bins)
+	  size_t tc_idx = fast_csize2tidx (nb);
+	  if (__glibc_likely (tcache != NULL) && tc_idx < TCACHE_FIXED_SIZE_BINS)
 	    {
 	      mchunkptr tc_victim;
 
@@ -4106,8 +4179,8 @@ _int_malloc (mstate av, size_t bytes)
 
 #if USE_TCACHE
   INTERNAL_SIZE_T tcache_nb = 0;
-  size_t tc_idx = csize2tidx (nb);
-  if (tcache != NULL && tc_idx < mp_.tcache_bins)
+  size_t tc_idx = fast_csize2tidx (nb);
+  if (tcache != NULL && tc_idx < TCACHE_FIXED_SIZE_BINS)
     tcache_nb = nb;
   int return_cached = 0;
 
@@ -4285,7 +4358,7 @@ _int_malloc (mstate av, size_t bytes)
 	  && mp_.tcache_unsorted_limit > 0
 	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
 	{
-	  return tcache_get (tc_idx);
+	  return tcache_get (nb, tc_idx);
 	}
 #endif
 
@@ -4298,7 +4371,7 @@ _int_malloc (mstate av, size_t bytes)
       /* If all the small chunks we found ended up cached, return one now.  */
       if (return_cached)
 	{
-	  return tcache_get (tc_idx);
+	  return tcache_get (nb, tc_idx);
 	}
 #endif
 
@@ -5530,14 +5603,12 @@ do_set_arena_max (size_t value)
 static __always_inline int
 do_set_tcache_max (size_t value)
 {
-  if (value <= MAX_TCACHE_SIZE)
-    {
-      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
-      mp_.tcache_max_bytes = value;
-      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
-      return 1;
-    }
-  return 0;
+  LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+  mp_.tcache_max_bytes = value;
+  mp_.tcache_bins = TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS;
+  mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+
+  return 1;
 }
 
 static __always_inline int
