I have a data frame called Cluster_Dummy which has nine independent variables that are continuous (measurements of acoustic parameters from a spectrogram) and are grouped by the dependent variable, which is called Country (Categorical but I gave them dummy values of 1, 2, and 3 for the LDA lm model) which has 3 levels/cluster groups. I have produced an lm model (called LDA), produced the cutoff for predictions, predicted the LDA model with my actual data, and then produced an output table showing group vs predicted values.
I want to predict the power of the prediction model using the function confusionMatrix(out) in the caret package, although, I keep on getting this error message (see below) when using every combination of code that I keep on trying.
Would anyone be able to lend a hand? I'm feeling baffled.
Many thanks in advance if you can provide clarity
Error Message
Error in !all.equal(nrow(data), ncol(data)) : invalid argument type
R-CODE
library(MASS)
library(dplyr)
library(caret)
library(e1071)
###################
Shuffled data frame
###################
#Produce a new version of the data frame 'Clusters_Dummy' with the rows shuffled
#This will be used to predict against the linear model LDA (below)
NewClusters=Cluster_Dummy[sample(1:nrow(Cluster_Dummy)), ]
#Produce a dataframe
NewCluster<-as.data.frame(NewClusters)
#display
print(NewCluster)
#Check the structure of the data
str(NewCluster)
#Number of rows
nrow(NewCluster)
#############
# Cutoff #
#############
#As a starting point we will need the group means to determine, what
#is known as the cluster head (mean of each group).
#In our case, group is our dependent variable = Country
MechMean.Low.Freq<-tapply(Cluster_Dummy$Low.Freq, Cluster_Dummy$Country, mean)
MechMean.High.Freq<-tapply(Cluster_Dummy$High.Freq, Cluster_Dummy$Country, mean)
MechMean.Peak.Freq<-tapply(Cluster_Dummy$Peak.Freq, Cluster_Dummy$Country, mean)
MechMean.Delta.Freq <-tapply(Cluster_Dummy$Delta.Freq, Cluster_Dummy$Country, mean)
MechMean.Delta.Time<-tapply(Cluster_Dummy$Delta.Time, Cluster_Dummy$Country, mean)
MechMean.Peak.Time<-tapply(Cluster_Dummy$Peak.Time, Cluster_Dummy$Country, mean)
MechMean.Center.Freq<-tapply(Cluster_Dummy$Center.Freq, Cluster_Dummy$Country, mean)
MechMean.Start.Freq<-tapply(Cluster_Dummy$Start.Freq, Cluster_Dummy$Country, mean)
MechMean.End.Freq<-tapply(Cluster_Dummy$End.Freq, Cluster_Dummy$Country, mean)
#Store all the mechanical means in one object
#Create a data frame to store the means we found for each Group
Cluster_Rbind<-rbind(MechMean.Low.Freq,
MechMean.High.Freq,
MechMean.Peak.Freq,
MechMean.Delta.Freq,
MechMean.Delta.Time,
MechMean.Peak.Time,
MechMean.Center.Freq,
MechMean.Start.Freq,
MechMean.End.Freq)
Cluster_Rbind
#Transpose the 'Cluster_Rbind' object
Clusterheads <- as.data.frame(t(Cluster_Rbind))
Clusterheads
#Check the structure of the dataframe
str(Clusterheads)
#Create a data frame to store the means we found for each Group
Clusterheads<- Clusterheads %>% dplyr::rename(Low.Freq = MechMean.High.Freq,
High.Freq = MechMean.High.Freq,
Peak.Freq = MechMean.Peak.Freq,
Delta.Freq = MechMean.Delta.Freq,
Delta.Time = MechMean.Delta.Time,
Peak.Time = MechMean.Peak.Time,
Center.Freq = MechMean.Center.Freq,
Start.Freq = MechMean.Start.Freq,
End.Freq = MechMean.End.Freq)
Clusterheads
#Check the structure of the dataframe
str(Clusterheads)
#In order to create a cut-off we need to first determine the predicted values
#for the Cluster heads. We can do this using the predict() functions.
#Where we define the new data to be our previously determined cluster heads.
#generates predicted values for the cluster heads
predM <- predict(LDA, newdata = Clusterheads)
predM
# 1 2 3
#1.577793 2.089622 2.327849
#Cutoff = the average or mean of the values predicted by our model for the cluster heads.
cutoff <- mean(predM)
cutoff
#[1] 1.998421
#Model format for lda and predictions
LDA<-lm(Country~Low.Freq+High.Freq+Peak.Freq+Delta.Freq+Delta.Time+Peak.Time+Center.Freq+Start.Freq+End.Freq, data=NewCluster)
LDA
#Thus, now we can use the predict()
#function to determine the predicted value for all observations
#in our original data set. Technically, these predicted values
#are known as the discriminant scores.
pred <- predict(LDA, NewCluster[2:10])
pred
# stores these predicted values in a new column within the Employee data frame
# named score.
NewCluster$Scores <- pred
NewCluster$Scores
#Check the header in the dataframe NewCluster in the first five rows
head(NewCluster, 5)
#We will need to convert the Scores into a 1 or two depending on the cut-off.
#Thus, to do so we can use a simple ifelse if score is less than cut-off set
#as a 1 and else a 2. Then store this column in our data frame under the name of Predicted.
# codes our scores into 1 or 2 based on the cutoff and stores it as a Predicted
# column within our NewCluster data frame
NewCluster$Predicted <- ifelse(NewCluster$Score <= cutoff, 1, 2)
head(NewCluster, 5) # shows first three columns
#We can see that for the first three observations our model correctly predicts the group.
#However, the cliche comes to light, “all models are wrong, just some are useful”.
#We need to determine the accuracy of our model. To do so we need to count
#how many times our model was correct and how many times is made an error in predicting the group.
#To accomplish this we can use our friend the table() function.
#Where we can tally both the Predicted column and the Country column.
out <- table(NewCluster$Country, NewCluster$Predicted) #two dimensional table of Group versus Predicted
out #prints table output to the console
#Obtain a more thorough summary of the predicting power of our model
confusionMatrix(out)
Dummy Data
structure(list(ID = 1:99, Low.Freq = c(-0.304216433098245, 0.365249308493684,
-0.304219520657338, -0.304200404824886, -0.304159263455675, -0.304014511002047,
4.35383706315091, 3.02969095427817, -0.301782981224345, -0.287938885585248,
-0.304191675711965, -0.30421660383884, 1.60142261344585, -0.280652929087063,
-0.304181232078903, -0.240266909154505, -0.304206053492905, -0.300413570510289,
-0.236315559162406, -0.279763683611381, -0.300837540750323, -0.250501947009389,
-0.303872753623024, -0.287729934667888, -0.26775078085398, -0.304202133573411,
-0.304219470857998, -0.300253721742384, 2.89901448335972, -0.154774219089617,
-0.304219463743806, -0.252755274669724, -0.304219108034234, -0.304219463743806,
-0.304215223685697, -0.304154084324293, -0.304198028684939, -0.0892417804319289,
-0.303889265661401, -0.304219257432254, -0.304218901722681, 2.51581924590476,
-0.0983752639954022, -0.304164584870887, 0.376523544751835, -0.299386473346695,
-0.299123262491073, -0.303676793219269, 2.06357378708432, -0.304219485086381,
-0.303572314203494, 0.286995229982817, -0.304151736641111, -0.302589716307479,
-0.303914969235145, -0.304219492200572, -0.174916949507755, -0.293948406737563,
-0.304178044921129, -0.245145693602042, -0.304218915951064, -0.304213992930574,
-0.304049164228647, -0.304219470857998, -0.234288875413591, -0.289451618800448,
-0.251330295006178, 0.280389062935361, -0.304219513543147, 0.364778199621038,
-0.235648304917028, -0.304219492200572, -0.304219506428955, -0.303750190332558,
-0.30421952777153, 6.36020803814356, -0.269466161812907, -0.304214227698892,
-0.304219470857998, -0.303817888978487, -0.303329257852277, 0.215429394946275,
-0.300545218623249, -0.304219477972189, -0.272627260303485, -0.304215173886356,
-0.304191312888201, -0.304219470857998, -0.304153685929571, -0.303621032186609,
-0.303535014497671, -0.304179048022125, -0.304219129376808, -0.304194194135742,
-0.303659370564385, -0.291390058118398, -0.298461521744077, -0.304168796472231,
-0.30421952777153), High.Freq = c(-0.268066145167784, -0.268091503863823,
-0.268119902767041, -0.203361841212907, -0.206792382631088, -0.262213276577144,
-0.268119264589441, 4.37533260032138, -0.226454200184529, -0.231124889088292,
-0.268119690041174, -0.268112767586926, -0.265097351834236, -0.268113042357837,
-0.267723319706273, -0.26811914936293, 1.5189608633156, -0.268119300043752,
-0.26811994708493, -0.267540082962754, -0.268114682119727, 0.521564428144686,
-0.259730402304845, -0.206598580502851, -0.262311210248074, -0.268119920494197,
-0.268119902767041, -0.268114478257438, -0.250333441001039, -0.268112076227858,
-0.268107564666767, -0.267864959679254, 0.534207931855464, -0.268111234187969,
-0.268119955948508, 2.35735620911043, 4.04922589399634, -0.268119911630619,
-0.26811203190997, -0.268115267115861, -0.268119911630619, -0.267241522208694,
-0.268119938221352, -0.25991213224014, -0.268039492389389, -0.26811994708493,
-0.268119769813375, 0.517202049930545, -0.263016573768087, -0.268119530496774,
-0.268045227124215, -0.268118236414418, 0.28430249395547, -0.268119911630619,
-0.268119237998708, 0.0920491075283562, 6.88391443021725, -0.207686549221468,
-0.268088924562688, -0.267306456779536, 0.0519087291830332, -0.216406767497924,
0.249414042490642, -0.268119654586863, -0.268090883413378, -0.210439283982964,
-0.26811978754053, -0.268116392790239, -0.268114779619082, -0.268119282316596,
-0.231968223060083, -0.268118192096529, -0.26805896566978, -0.2673022820344,
-0.267395668689927, -0.268119601405397, -0.268117057558573, -0.26760796024142,
0.597575918505807, -0.244396740691722, -0.268119282316596, -0.268078350314392,
-0.260877349270033, -0.268119503906041, -0.267085470058237, -0.268114052805704,
-0.268119929357775, -0.267740231412684, -0.268033766518141, -0.268119769813375,
-0.268119920494197, -0.268116773924084, -0.26809687519196, -0.268119760949797,
-0.267849705461888, -0.268069965369809, -0.268119654586863, 0.192109133041653,
-0.26780490893977), Peak.Freq = c(0.101401275253481, -0.350550343192447,
4.81764839072122, -0.128530375542999, -0.350427022914335, -0.0246746068894746,
-0.350583487660151, -0.350597503925647, -0.000425387009183284,
4.8405272531038, -0.350581794071363, 0.0926208226183403, 3.30172336242359,
-0.348876693035011, -0.306940395519684, -0.330159939391773, -0.0878370527122053,
1.57214860697077, -0.350579892680269, -0.350531874762559, -0.350597420804725,
-0.350595384342133, -0.350557590297846, -0.350596516864697, 0.00572598202896427,
-0.35023674353838, -0.350597238977708, -0.347998556857988, -0.350593971286456,
-0.350597623411973, 0.182722167070597, -0.034234603897863, -0.32731738048649,
0.138386781549843, -0.0380389653833623, -0.350595306416268, -0.350594615473603,
-0.35059758704657, -0.350596735057118, -0.301356165728367, -0.350597524705878,
-0.350597353268976, -0.334637751783522, -0.350586407282541, -0.208762720958806,
-0.35059758704657, -0.350575482076337, -0.350594989517753, -0.346896622793157,
-0.35033435867132, 2.36347546444285, -0.350011397523396, -0.349592982386508,
-0.348216894740223, 0.135549137169081, -0.235822539749817, -0.350597597436685,
-0.350597581851512, -0.341815586284254, -0.350597628607031, -0.3505976078268,
-0.347206050815944, -0.350365061461936, -0.350486412813205, -0.35056965841673,
-0.35059604930951, -0.350593882970477, -0.347214882413922, -0.350585025397211,
-0.350594584303257, 2.23804439285341, -0.350592719277567, -0.350597597436685,
-0.350593389440001, -0.350357580578943, -0.35039432002653, -0.316201006551691,
-0.350250053276038, -0.210824161388379, -0.350576339260847, -0.350597628607031,
-0.350597555876224, -0.350597171441959, -0.350594875226485, 0.157525794672617,
-0.350597161051844, -0.350597597436685, 1.0042362264688, -0.350597581851512,
3.9779541232322, 0.166140836672696, -0.350597602631742, -0.306670091475937,
-0.350316627939616, -0.350597618216915, -0.350596023334222, 1.20356177706191,
-0.350571902681628, -0.348234791713772), Delta.Freq = c(-0.276403626940201,
-0.27640302987526, -0.27567939534588, -0.276397173731456, -0.276403626940201,
-0.244840017851571, -0.276397271879118, -0.276403373392075, -0.204782039459728,
1.71247148857326, -0.269714994671489, -0.20449100710642, -0.276342889895667,
4.85117158453868, -0.276328372220734, -0.275620212305987, -0.276338923094347,
-0.20979859545098, -0.276403618761229, -0.276403635119173, -0.276351625037542,
-0.274922251569065, -0.274280259536122, -0.276348451596486, -0.276403397928991,
-0.2760931940658, -0.27640366783506, -0.276403651477116, 4.62756246252119,
-0.271777764073554, -0.276403512434596, -0.276403275244414, 1.92742374965492,
0.301658094584923, -0.212357689398355, -0.276403635119173, 0.430949088723176,
-0.276403496076652, 0.46706318327441, -0.276403651477116, -0.27581187015201,
-0.271706647913814, -0.269291691986345, -0.275803184083966, -0.223500736926992,
-0.27638519153778, -0.227832802947865, -0.276147428827759, -0.276393452299291,
-0.276400772479045, -0.276331553840762, -0.276403643298145, -0.276382664235496,
-0.275726375359859, -0.0323736652776823, -0.276360891812583,
-0.276403365213104, -0.000346897374361742, -0.275755018119078,
-0.276389469140027, -0.206701848613824, -0.276403340676188, -0.257680897239843,
-0.276396617561374, -0.269848647249557, -0.27633802340745, -0.276403626940201,
-0.276403545150483, -0.240467138761451, -0.273083708678703, -0.275822159298526,
-0.268631289087897, -0.276335626968714, -0.26497786507257, -0.276396862930528,
-0.276403095307034, -0.276389387350309, -0.275897332228275, -0.205168528592831,
2.18342649099756, -0.276028563830692, 0.471294091772149, -0.275750233420579,
-0.27601299924737, -0.27635887160655, -0.276403520613568, -0.276369430659135,
-0.276380275975732, 5.8908566987373, -0.276400600720638, 0.163700449281611,
-0.276403078949091, -0.276402555494896, -0.276399504738417, -0.274503488213267,
-0.169029275524362, -0.276389829014786, -0.276333484078105, -0.276403479718709
), Delta.Time = c(-0.27972630760955, -0.286566314347326, -0.286603856751927,
-0.286581084632265, -0.268942038320055, -0.286605302440323, 0.0396904405264516,
-0.286606025284521, -0.286602350405277, -0.286606353850066, -0.286239457343641,
-0.286166728096649, -0.286580280910395, -0.283396142108779, -0.286606363959775,
-0.286605919132576, 4.36288044673204, -0.286194696606769, -0.270757251746708,
-0.264309557323939, -0.286577010419513, -0.286606338685502, -0.0510313640296911,
-0.286604245975726, -0.281128807328308, -0.286606136491321, -0.207245719021333,
-0.286606343740357, 0.169177899206709, 1.97018336315715, -0.27481809946333,
-0.282756298622242, -0.0378143145709261, -0.286604453224762,
-0.28660614660103, -0.284113340034203, -0.28660614660103, 3.75018606828656,
-0.0574077799487183, -0.254933459299987, -0.286606348795211,
-0.269188265339116, -0.286604604870398, -0.277703632843235, -0.286605297385469,
-0.286601531518843, 4.30823812638282, -0.286606348795211, -0.286586892660121,
-0.24062436917805, -0.285834684812215, -0.246318925659679, -0.122996276578071,
-0.286606222423848, -0.260312956808407, -0.286605802870922, -0.247146107109989,
-0.286606025284521, -0.286605171014105, -0.286580776286139, -0.286606369014629,
-0.286606374069484, -0.213448323767311, 0.330299436209101, -0.286606338685502,
-0.284101172999347, -0.286606232533557, -0.241287192033284, -0.286591846417562,
-0.286601531518843, -0.286606181985012, -0.286606333630648, -0.286602891274712,
4.43274059873138, -0.28617523036197, -0.283674093394876, -0.28612144670976,
-0.281642218793315, 0.0431928329693407, -0.285911296187487, -0.286606232533557,
-0.286567856077958, -0.286568331234283, -0.286606015174812, -0.286606369014629,
-0.286604786845161, -0.286606348795211, -0.286594181760355, -0.243007500566117,
-0.0273019004129648, -0.286602077443133, -0.282105233358647,
-0.285793882026441, -0.286606348795211, 4.01427987002044, -0.286606369014629,
-0.286606343740357, 0.102461290472251, -0.285643247361416), Peak.Time = c(-0.333882961963073,
-0.339190085829301, 2.51587312397892, -0.298439910422876, -0.315138642768508,
-0.337891910628967, -0.34156427399663, -0.341545202612103, -0.341458694644134,
0.0343701972802003, -0.3415606830405, -0.341559440622102, 3.6979716686817,
-0.341197524666907, 3.52439427495817, -0.340922609453223, 0.704279655136718,
-0.341532259443219, -0.341559524498449, 0.225011342729421, -0.341523300400845,
-0.341564258269815, -0.341564247785271, -0.341563917522153, -0.34156049956099,
-0.337442003142906, -0.341105653854966, -0.341556085568199, -0.341559687008873,
-0.0920345828925796, 0.0741254419363656, -0.3415640013985, -0.341525601758131,
-0.341561181056314, -0.306923688442663, -0.341559278111678, -0.341531279138407,
0.0543024309739546, -0.311462892893313, -0.207114289498659, -0.341561925458898,
1.38086893619461, -0.341564258269815, -0.341564268754358, -0.341550109378435,
-0.229549812785459, -0.0090520007192101, -0.341560567710522,
-0.341560179782415, -0.336794755580114, -0.341561773433018, -0.341564017125316,
-0.341559445864373, -0.338524380227773, -0.341523127405878, -0.341561479865802,
-0.330254119935253, -0.341563775980816, -0.341564247785271, -0.341560525772348,
-0.341564253027543, 3.08316117591663, 2.00620932329784, -0.341564268754358,
-0.173956989001828, -0.341560897973641, -0.341564242543, -0.338366970534754,
-0.320308466296045, -0.339175737731596, -0.315772858043798, -0.341564232058456,
-0.341183097935126, -0.34151187224849, 0.099262360264613, -0.341093827289959,
-0.341564253027543, -0.34153792109668, -0.341542649625774, -0.341443229942552,
-0.341563854614892, 0.914667609886587, -0.341545532875221, 4.3691294791881,
-0.341528516461209, -0.341544369090899, -0.340505345593179, -0.341461404898615,
-0.234449344782971, -0.242254196191946, 4.53752409573067, -0.333741226662506,
-0.339990958164928, -0.340811316024549, -0.314552200315428, -0.339376448589044,
-0.341560017271991, -0.341271802414946, -0.341563870341707),
Center_Freq = c(-0.366017554449386, -0.321907548370746, 1.16773931992075,
-0.366099397015396, 0.0999682470982382, -0.365925816303566,
-0.366089690165716, -0.366305393402328, -0.366333238747784,
-0.366311978553635, -0.366318742233443, 3.47987752379084,
-0.366331417757067, -0.0918624745118981, -0.36633296330381,
-0.366332636851693, -0.342355509248839, -0.345043398665276,
-0.366333177538012, -0.366329193802017, -0.364593881462699,
-0.366329351927261, -0.366105074221751, -0.366332866388338,
0.662096461999439, -0.366332953102182, -0.366333325461628,
-0.365999625086998, -0.366330295577913, 0.0881144697652147,
-0.36150530777778, -0.330928991426984, -0.366084696468482,
-0.354510433559405, -0.340185551419195, -0.366320859071393,
-0.366333289755928, -0.363489881611221, 1.93035801604625,
-0.259182324407947, -0.366333284655113, -0.329433901938789,
-0.366333029614397, -0.366333284655113, 2.95708977301997,
-0.366100968066212, -0.366136984916233, 0.0981224307171609,
-0.366333136731498, -0.360638403978236, -0.354624503070386,
-0.366318023018622, -0.366333284655113, -0.366316650899566,
-0.366332611347621, -0.366059702478239, -0.362926073300296,
-0.363448320176017, -0.366332902094038, -0.366328377671723,
2.46716034601805, -0.366025083251345, -0.366299680490272,
4.00477836341233, 2.73402086869333, -0.366310856374481, -0.36633292759811,
-0.341415464872541, -0.366329530455763, -0.366333289755928,
-0.366045358988328, -0.365695014656521, 0.00625049563732535,
-0.363698882377797, -0.366333294856742, -0.343681945411851,
3.47987752379084, -0.366331417757067, -0.0918624745118981,
-0.36633296330381, -0.366332636851693, -0.366330734247946,
-0.36514828387199, -0.29936930948374, -0.366324699984587,
-0.366289560474631, -0.366333213243713, -0.336370060683351,
-0.366310121857217, 2.92199877690913, -0.347397215347636,
-0.366303092935063, -0.366288275069419, -0.366333294856742,
-0.355182833106715, -0.0519843182400366, -0.366143085490178,
3.28803282594943, -0.366224239446253), Country = c("Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "Holland", "Holland",
"Holland", "Holland", "Holland", "Holland", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France", "France", "France", "France", "France",
"France", "France")), class = "data.frame", row.names = c(NA,
-99L))