diff --git a/AUTHORS.md b/AUTHORS.md
index 832d233..89a716e 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -2,4 +2,5 @@
 - [Dominik Honnef](https://github.com/dominikh) dominik@honnef.co
 - [Dong-hee Na](https://github.com/corona10/) donghee.na92@gmail.com
 - [Gustavo Brunoro](https://github.com/brunoro/) git@hitnail.net
-- [Alex Higashino](https://github.com/TokyoWolFrog/) TokyoWolFrog@mayxyou.com
\ No newline at end of file
+- [Alex Higashino](https://github.com/TokyoWolFrog/) TokyoWolFrog@mayxyou.com
+- [Evan Oberholster](https://github.com/evanoberholster/) eroberholster@gmail.com
\ No newline at end of file
diff --git a/etcs/utils.go b/etcs/utils.go
index 795c75f..1d1cdcd 100644
--- a/etcs/utils.go
+++ b/etcs/utils.go
@@ -30,6 +30,16 @@ func MedianOfPixels(pixels []float64) float64 {
 	return v
 }
 
+// MedianOfPixelsFast64 function returns a median value of pixels.
+// It uses quick selection algorithm.
+func MedianOfPixelsFast64(pixels []float64) float64 {
+	tmp := [64]float64{}
+	copy(tmp[:], pixels)
+	l := len(tmp)
+	pos := l / 2
+	return quickSelectMedian(tmp[:], 0, l-1, pos)
+}
+
 func quickSelectMedian(sequence []float64, low int, hi int, k int) float64 {
 	if low == hi {
 		return sequence[k]
diff --git a/hashcompute.go b/hashcompute.go
index 9b1fcbf..8a14816 100644
--- a/hashcompute.go
+++ b/hashcompute.go
@@ -7,6 +7,7 @@ package goimagehash
 import (
 	"errors"
 	"image"
+	"sync"
 
 	"github.com/corona10/goimagehash/etcs"
 	"github.com/corona10/goimagehash/transforms"
@@ -71,19 +72,33 @@ func PerceptionHash(img image.Image) (*ImageHash, error) {
 
 	phash := NewImageHash(0, PHash)
 	resized := resize.Resize(64, 64, img, resize.Bilinear)
-	pixels := transforms.Rgb2Gray(resized)
-	dct := transforms.DCT2D(pixels, 64, 64)
-	flattens := transforms.FlattenPixels(dct, 8, 8)
-	median := etcs.MedianOfPixels(flattens)
+
+	pixels := pixelPool64.Get().(*[]float64)
+
+	transforms.Rgb2GrayFast(resized, pixels)
+	transforms.DCT2DFast64(pixels)
+	flattens := transforms.FlattenPixelsFast64(*pixels, 8, 8)
+
+	pixelPool64.Put(pixels)
+
+	median := etcs.MedianOfPixelsFast64(flattens)
 
 	for idx, p := range flattens {
 		if p > median {
-			phash.leftShiftSet(len(flattens) - idx - 1)
+			phash.leftShiftSet(64 - idx - 1) // leftShiftSet
 		}
 	}
+
 	return phash, nil
 }
 
+var pixelPool64 = sync.Pool{
+	New: func() interface{} {
+		p := make([]float64, 4096)
+		return &p
+	},
+}
+
 // ExtPerceptionHash function returns phash of which the size can be set larger than uint64
 // Some variable name refer to https://github.com/JohannesBuchner/imagehash/blob/master/imagehash/__init__.py
 // Support 64bits phash (width=8, height=8) and 256bits phash (width=16, height=16)
diff --git a/transforms/dct.go b/transforms/dct.go
index b0976a3..729500f 100644
--- a/transforms/dct.go
+++ b/transforms/dct.go
@@ -73,3 +73,27 @@ func DCT2D(input [][]float64, w int, h int) [][]float64 {
 	wg.Wait()
 	return output
 }
+
+// DCT2DFast64 function returns a result of DCT2D by using the seperable property.
+// Fast uses static DCT tables for improved performance.
+func DCT2DFast64(input *[]float64) {
+	if len(*input) != 4096 {
+		panic("incorrect input size")
+	}
+
+	for i := 0; i < 64; i++ { // height
+		DCT1DFast64((*input)[i*64 : (i*64)+64])
+		//forwardTransformFast((*input)[i*64:(i*64)+64], temp[:], 64)
+	}
+
+	for i := 0; i < 64; i++ { // width
+		row := [64]float64{}
+		for j := 0; j < 64; j++ {
+			row[j] = (*input)[i+((j)*64)]
+		}
+		DCT1DFast64(row[:])
+		for j := 0; j < len(row); j++ {
+			(*input)[i+(j*64)] = row[j]
+		}
+	}
+}
diff --git a/transforms/pixels.go b/transforms/pixels.go
index 378e1b5..be82b86 100644
--- a/transforms/pixels.go
+++ b/transforms/pixels.go
@@ -27,6 +27,55 @@ func Rgb2Gray(colorImg image.Image) [][]float64 {
 	return pixels
 }
 
+// Rgb2GrayFast function converts RGB to a gray scale array.
+func Rgb2GrayFast(colorImg image.Image, pixels *[]float64) {
+	bounds := colorImg.Bounds()
+	w, h := bounds.Max.X-bounds.Min.X, bounds.Max.Y-bounds.Min.Y
+	if w != h {
+		return
+	}
+	switch c := colorImg.(type) {
+	case *image.YCbCr:
+		rgb2GrayYCbCR(c, *pixels, w)
+	case *image.RGBA:
+		rgb2GrayRGBA(c, *pixels, w)
+	default:
+		rgb2GrayDefault(c, *pixels, w)
+	}
+}
+
+// pixel2Gray converts a pixel to grayscale value base on luminosity
+func pixel2Gray(r, g, b, a uint32) float64 {
+	return 0.299*float64(r/257) + 0.587*float64(g/257) + 0.114*float64(b/256)
+}
+
+// rgb2GrayDefault uses the image.Image interface
+func rgb2GrayDefault(colorImg image.Image, pixels []float64, s int) {
+	for i := 0; i < s; i++ {
+		for j := 0; j < s; j++ {
+			pixels[j+(i*s)] = pixel2Gray(colorImg.At(j, i).RGBA())
+		}
+	}
+}
+
+// rgb2GrayYCbCR uses *image.YCbCr which is signifiantly faster than the image.Image interface.
+func rgb2GrayYCbCR(colorImg *image.YCbCr, pixels []float64, s int) {
+	for i := 0; i < s; i++ {
+		for j := 0; j < s; j++ {
+			pixels[j+(i*s)] = pixel2Gray(colorImg.YCbCrAt(j, i).RGBA())
+		}
+	}
+}
+
+// rgb2GrayYCbCR uses *image.RGBA which is signifiantly faster than the image.Image interface.
+func rgb2GrayRGBA(colorImg *image.RGBA, pixels []float64, s int) {
+	for i := 0; i < s; i++ {
+		for j := 0; j < s; j++ {
+			pixels[(i*s)+j] = pixel2Gray(colorImg.At(j, i).RGBA())
+		}
+	}
+}
+
 // FlattenPixels function flattens 2d array into 1d array.
 func FlattenPixels(pixels [][]float64, x int, y int) []float64 {
 	flattens := make([]float64, x*y)
@@ -37,3 +86,14 @@ func FlattenPixels(pixels [][]float64, x int, y int) []float64 {
 	}
 	return flattens
 }
+
+// FlattenPixelsFast64 function flattens 2d array into 1d array.
+func FlattenPixelsFast64(pixels []float64, x int, y int) []float64 {
+	flattens := [64]float64{}
+	for i := 0; i < y; i++ {
+		for j := 0; j < x; j++ {
+			flattens[y*i+j] = pixels[(i*64)+j]
+		}
+	}
+	return flattens[:]
+}
diff --git a/transforms/static.go b/transforms/static.go
new file mode 100644
index 0000000..15dc5a8
--- /dev/null
+++ b/transforms/static.go
@@ -0,0 +1,203 @@
+package transforms
+
+import "math"
+
+// DCT1DFast64 function returns result of DCT-II.
+// DCT type II, unscaled. Algorithm by Byeong Gi Lee, 1984.
+// Static implementation by Evan Oberholster, 2022.
+func DCT1DFast64(input []float64) {
+	var temp [64]float64
+	for i := 0; i < 32; i++ {
+		x, y := input[i], input[63-i]
+		temp[i] = x + y
+		temp[i+32] = (x - y) / dct64[i]
+	}
+	forwardTransformStatic32(temp[:32])
+	forwardTransformStatic32(temp[32:])
+	for i := 0; i < 32-1; i++ {
+		input[i*2+0] = temp[i]
+		input[i*2+1] = temp[i+32] + temp[i+32+1]
+	}
+	input[62], input[63] = temp[31], temp[63]
+}
+
+func forwardTransformStatic32(input []float64) {
+	var temp [32]float64
+	for i := 0; i < 16; i++ {
+		x, y := input[i], input[31-i]
+		temp[i] = x + y
+		temp[i+16] = (x - y) / dct32[i]
+	}
+	forwardTransformStatic16(temp[:16])
+	forwardTransformStatic16(temp[16:])
+	for i := 0; i < 16-1; i++ {
+		input[i*2+0] = temp[i]
+		input[i*2+1] = temp[i+16] + temp[i+16+1]
+	}
+
+	input[30], input[31] = temp[15], temp[31]
+}
+
+func forwardTransformStatic16(input []float64) {
+	var temp [16]float64
+	for i := 0; i < 8; i++ {
+		x, y := input[i], input[15-i]
+		temp[i] = x + y
+		temp[i+8] = (x - y) / dct16[i]
+	}
+	forwardTransformStatic8(temp[:8])
+	forwardTransformStatic8(temp[8:])
+	for i := 0; i < 8-1; i++ {
+		input[i*2+0] = temp[i]
+		input[i*2+1] = temp[i+8] + temp[i+8+1]
+	}
+
+	input[14], input[15] = temp[7], temp[15]
+}
+
+func forwardTransformStatic8(input []float64) {
+	var temp [8]float64
+	x0, y0 := input[0], input[7]
+	x1, y1 := input[1], input[6]
+	x2, y2 := input[2], input[5]
+	x3, y3 := input[3], input[4]
+
+	temp[0] = x0 + y0
+	temp[1] = x1 + y1
+	temp[2] = x2 + y2
+	temp[3] = x3 + y3
+	temp[4] = (x0 - y0) / dct8[0]
+	temp[5] = (x1 - y1) / dct8[1]
+	temp[6] = (x2 - y2) / dct8[2]
+	temp[7] = (x3 - y3) / dct8[3]
+
+	forwardTransformStatic4(temp[:4])
+	forwardTransformStatic4(temp[4:])
+
+	input[0] = temp[0]
+	input[1] = temp[4] + temp[5]
+	input[2] = temp[1]
+	input[3] = temp[5] + temp[6]
+	input[4] = temp[2]
+	input[5] = temp[6] + temp[7]
+	input[6] = temp[3]
+	input[7] = temp[7]
+}
+
+func forwardTransformStatic4(input []float64) {
+	var (
+		t0, t1, t2, t3 float64
+	)
+	x0, y0 := input[0], input[3]
+	x1, y1 := input[1], input[2]
+
+	t0 = x0 + y0
+	t1 = x1 + y1
+	t2 = (x0 - y0) / dct4[0]
+	t3 = (x1 - y1) / dct4[1]
+
+	x, y := t0, t1
+	t0 += t1
+	t1 = (x - y) / dct2[0]
+
+	x, y = t2, t3
+	t2 += t3
+	t3 = (x - y) / dct2[0]
+
+	input[0] = t0
+	input[1] = t2 + t3
+	input[2] = t1
+	input[3] = t3
+}
+
+func init() {
+	// dct256
+	for i := 0; i < 128; i++ {
+		dct256[i] = (math.Cos((float64(i)+0.5)*math.Pi/256) * 2)
+	}
+	// dct128
+	for i := 0; i < 64; i++ {
+		dct128[i] = (math.Cos((float64(i)+0.5)*math.Pi/128) * 2)
+	}
+}
+
+// Static DCT Tables
+var (
+	dct256 = [128]float64{}
+	dct128 = [64]float64{}
+	dct64  = [32]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(1)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(2)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(3)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(4)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(5)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(6)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(7)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(8)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(9)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(10)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(11)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(12)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(13)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(14)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(15)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(16)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(17)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(18)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(19)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(20)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(21)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(22)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(23)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(24)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(25)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(26)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(27)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(28)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(29)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(30)+0.5)*math.Pi/64) * 2),
+		(math.Cos((float64(31)+0.5)*math.Pi/64) * 2),
+	}
+	dct32 = [16]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(1)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(2)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(3)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(4)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(5)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(6)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(7)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(8)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(9)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(10)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(11)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(12)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(13)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(14)+0.5)*math.Pi/32) * 2),
+		(math.Cos((float64(15)+0.5)*math.Pi/32) * 2),
+	}
+	dct16 = [8]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(1)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(2)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(3)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(4)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(5)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(6)+0.5)*math.Pi/16) * 2),
+		(math.Cos((float64(7)+0.5)*math.Pi/16) * 2),
+	}
+	dct8 = [4]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/8) * 2),
+		(math.Cos((float64(1)+0.5)*math.Pi/8) * 2),
+		(math.Cos((float64(2)+0.5)*math.Pi/8) * 2),
+		(math.Cos((float64(3)+0.5)*math.Pi/8) * 2),
+	}
+	dct4 = [2]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/4) * 2),
+		(math.Cos((float64(1)+0.5)*math.Pi/4) * 2),
+	}
+	dct2 = [1]float64{
+		(math.Cos((float64(0)+0.5)*math.Pi/2) * 2),
+	}
+)