Home>
I'd like to use the R language to classify the numbers 3 and 4 in the mnist with the principal component analysis without supervising the numbers 3 and 4. Paste the code that compresses only one number 3 by principal component analysis.
Applicable source codelibrary (ggplot2)
library (dplyr)
# install.packages ("R.utils")
library (R.utils) # Use unzip ()
library (gclus)
library (MASS)
# install.packages ("recommenderlab")
library ("recommenderlab")
#download data from http://yann.lecun.com/exdb/mnist/
# download.file ("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
# "train-images-idx3-ubyte.gz")
# download.file ("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
# "train-labels-idx1-ubyte.gz")
# download.file ("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
# "t10k-images-idx3-ubyte.gz")
# download.file ("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
# "t10k-labels-idx1-ubyte.gz")
# gunzip the file
# R.utils :: gunzip ("train-images-idx3-ubyte.gz")
# R.utils :: gunzip ("train-labels-idx1-ubyte.gz")
# R.utils :: gunzip ("t10k-images-idx3-ubyte.gz")
# R.utils :: gunzip ("t10k-labels-idx1-ubyte.gz")
# load image files
load_image_file = function (filename) {
ret = list ()
f = file (filename, 'rb')
readBin (f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
nrow = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
ncol = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
x = readBin (f, 'integer', n = n * nrow * ncol, size = 1, signed = FALSE)
close (f)
data.frame (matrix (x, ncol = nrow * ncol, byrow = TRUE))
}
# load label files
load_label_file = function (filename) {
f = file (filename, 'rb')
readBin (f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
y = readBin (f, 'integer', n = n, size = 1, signed = FALSE)
close (f)
y
}
# load images
train = load_image_file ("train-images-idx3-ubyte")test = load_image_file ("t10k-images-idx3-ubyte")
# load labels
train $y = as.factor (load_label_file ("train-labels-idx1-ubyte"))
test $y = as.factor (load_label_file ("t10k-labels-idx1-ubyte"))
# helper function for visualization
show_digit = function (arr784, col = gray (12: 1/12), ...) {
image (matrix (as.matrix (arr784 [-785]), nrow = 28) [, 28: 1],
col = col, ...)
}
# load image files
load_image_file = function (filename) {
ret = list ()
f = file (filename, 'rb')
readBin (f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
nrow = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
ncol = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
x = readBin (f, 'integer', n = n * nrow * ncol, size = 1, signed = FALSE)
close (f)
data.frame (matrix (x, ncol = nrow * ncol, byrow = TRUE))
}
# load label files
load_label_file = function (filename) {
f = file (filename, 'rb')
readBin (f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin (f, 'integer', n = 1, size = 4, endian = 'big')
y = readBin (f, 'integer', n = n, size = 1, signed = FALSE)
close (f)
y
}
# load images
train = load_image_file ("../ input/mnistdt/train-images-idx3-ubyte")
test = load_image_file ("../ input/mnistdt/t10k-images-idx3-ubyte")
# load labels
train $y = as.factor (load_label_file ("../ input/mnistdt/train-labels-idx1-ubyte"))
test $y = as.factor (load_label_file ("../ input/mnistdt/t10k-labels-idx1-ubyte"))
#First 100 data of number 3
X<-train [train $y == 3,] [1: 100, -785]
#Average vector
mu.X = colMeans (X)
show_digit (255-mu.X) # Average handwritten 3 digit figure
# Error Z
Z<-t (apply (X, 1, function (x, m) {x- m},
m = mu.X))
show_digit (Z [1,])
show_digit (Z [10,])show_digit (Z [100,])
cov.Z<-cov (Z)
dim (cov.Z)
pca.Z<-eigen (cov.Z)
show_digit ((255-X [1,])) # Full information
# k = 50
U.50<-pca.Z $vectors [, 1: 50]
Z1.50<-t (pca.Z $vectors [, 1: 50])% *% Z [1,]%>% as.numeric
UX.50<-U.50% *% as.matrix (Z1.50, ncol = 1)
show_digit (255- (UX.50 + as.matrix (mu.X)))
# k = 100
U.100<-pca.Z $vectors [, 1: 100]
Z1.100<-t (pca.Z $vectors [, 1: 100])% *% Z [1,]%>% as.numeric
UX.100<-U.100% *% as.matrix (Z1.100, ncol = 1)
show_digit (255- (UX.100 + as.matrix (mu.X)))
# k = 150
U.150<-pca.Z $vectors [, 1: 150]
Z1.150<-t (pca.Z $vectors [, 1: 150])% *% Z [1,]%>% as.numeric
UX.150<-U.150% *% as.matrix (Z1.150, ncol = 1)
show_digit (255- (UX.150 + as.matrix (mu.X)))
# k = 200
U.200<-pca.Z $vectors [, 1: 200]
Z1.200<-t (pca.Z $vectors [, 1: 200])% *% Z [1,]%>% as.numeric
UX.200<-U.200% *% as.matrix (Z1.200, ncol = 1)
show_digit (255- (UX.200 + as.matrix (mu.X)))
plot (1: 784, pca.Z $values / sum (pca.Z $values), type = "o", col = 2, lwd = 2, xlab = "dimension", ylab = "variance explained", cex = 0.4)
plot (1: 784, cumsum (pca.Z $values)/sum (pca.Z $values), type = "o", col = 2, lwd = 2, xlab = "dimension", ylab = "variance explained" , cex = 0.4)
U<-pca.Z $vectors [, 1: 8]
show_digit (255-U [, 1])
show_digit (255-U [, 2])
show_digit (255-U [, 3])
show_digit (255-U [, 4])
show_digit (255-U [, 5])
show_digit (255-U [, 6])
show_digit (255-U [, 7])
show_digit (255-U [, 8])
A code that classifies one of the numbers above
X<-train [train $y == 3,] [1: 100, -785] + train [train $y == 4,] [1: 100, -785]
Or
X<-train [train $y == 3 || 4,] [1: 100, -785]
I tried it, but the unsupervised classification is not possible just by outputting an image that 3 and 4 are mixed.
I use the R editor on kaggle's notepad.
-
Answer # 1
Related articles
- i want to get the characters in a word file in c # without using visual studio
- python - i want to recognize and classify characters in images! !!
- java - i want to display katakana characters read from a file without duplication
- i want to be able to continue inputting characters without closing the window even if i enter a key
- python - dimensional error when discriminating handwritten characters using learning model of mnist
- python 3x - when i enter characters one by one without enter, the characters disappear
- some functions (library (), etc) cannot be used in r without arguments
- another package will be called without permission
- Angularjs handwritten calendar implementation code (without plugin)
- questions about programming to split characters without using a function to split strings such as strtok
- python - i want to display without newline characters (\ n) and quotation marks (')
Related questions
- python - i can't understand the teacher coercion of seq2seq
- python - [natural language processing] please tell me how to increase features in text classification task
- python 3x - natural language quantification in python
- python - about evaluation of numpy return value of keras predict
- i want to write a partial dependence plot in random forest
- python - distributed expression of multiple words by word2vec (vectorization)
- python - persistence of machine learning model? the need for
- error: unexpected'}'
- i want to know how to cancel the error in dataframe(chr37, cpg37)
There aren't any libraries here, so it can't be verified directly.
If you intend to retrieve data whose number is 3 or 4 in line,
it might be.