library(splm)
library(spdep)
library(rgdal)
library(spdep)
library(maptools)
library(foreign)

Data Preparation

Open the shapefile of Counties that we need to create a stacked weight for in R.

Counties<- readOGR(".","USCounties")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "USCounties"
## with 3098 features
## It has 63 fields

# Plot polygons for inspection
plot(Counties)

First, check how many counties are in the datases. This is good practice to ensure you are getting all the data you were expecting.

length(Counties)

## [1] 3098

Next, we’ll need to work with the unique identifier for each county. If the data structure for the unique FIPS code is a factor, we’ll need to convert to numeric value. Once converted, we look at the first few records of our dataset to ensure we have all the data required before continuing. Because this is a spatial format, we look at the data attached only by using @data.

class(Counties$FIPS)

## [1] "factor"

Counties$RID <- as.numeric(Counties$FIPS)
head(Counties@data)

##        AREA PERIMETER CO99_D00_ CO99_D00_I STATE COUNTY              NAME
## 0 0.5623933  4.254051       115        114    27    077 Lake of the Woods
## 1 0.6839444  5.326881       116        115    53    073           Whatcom
## 2 1.6499787  8.658548       118        117    30    029          Flathead
## 3 1.1592117  5.614212       119        118    30    053           Lincoln
## 4 0.4048582  2.966526       120        119    16    021          Boundary
## 5 1.3342615  5.517993       121        120    30    005            Blaine
##   LSAD LSAD_TRANS  FIPS OID_ FIPS_1 PR_END0105 PS_END0609  PR_ANNUAL
## 0   06     County 27077 1315  27077  0.3895349  0.1918159 0.07790698
## 1   06     County 53073 2945  53073  0.4103035  0.3763109 0.08206070
## 2   06     County 30029 1575  30029  0.4277601  0.3227757 0.08555203
## 3   06     County 30053 1587  30053  0.3678284  0.3022063 0.07356568
## 4   06     County 16021  525  16021  0.2394958  0.2063291 0.04789916
## 5   06     County 30005 1563  30005  0.3296178  0.3384615 0.06592357
##    PS_ANNUAL PR_AGE1   PS_AGE1 PR_AGE2   PS_AGE2 PR_AGE3    PS_AGE3 PR_FEM
## 0 0.04795396  0.4640 0.4595118  0.3020 0.2715360  0.1235 0.14200648 0.5155
## 1 0.09407773  0.4400 0.4438256  0.3060 0.2765459  0.1045 0.11622325 0.5435
## 2 0.08069392  0.4530 0.4694915  0.2830 0.2602371  0.0960 0.10413963 0.5275
## 3 0.07555156  0.4680 0.4886819  0.2320 0.2271315  0.0670 0.07359613 0.4675
## 4 0.05158228  0.4705 0.4727929  0.2585 0.2499296  0.0845 0.08360125 0.4780
## 5 0.08461538  0.4455 0.4067981  0.3195 0.3161429  0.1075 0.13512608 0.5295
##      PS_FEM PR_BLACK     PS_BLACK PR_ASIAN     PS_ASIAN PR_HISP
## 0 0.5066785   0.0005 0.0020839751    1e-03 0.0010419875  0.0000
## 1 0.5315883   0.0055 0.0065378831    7e-03 0.0122226685  0.0045
## 2 0.5165479   0.0005 0.0008925455    2e-03 0.0018939789  0.0020
## 3 0.4709373   0.0010 0.0011291083    5e-04 0.0006805764  0.0010
## 4 0.4752317   0.0020 0.0019882538    1e-03 0.0007201697  0.0030
## 5 0.5410103   0.0020 0.0024332937    0e+00 0.0000000000  0.0000
##    PS_HISPANI PR_AIAN     PS_AIAN PR_DUAL   PS_DUAL PR_POPDEN0 PS_POPDEN5
## 0 0.000000000  0.0040 0.005966745  0.0750 0.1807441   3.487303   3.296049
## 1 0.005520302  0.0150 0.017084961  0.0755 0.2264800  78.703168  87.368283
## 2 0.002041946  0.0085 0.011113559  0.0580 0.2040641  14.606920  16.187632
## 3 0.001361856  0.0080 0.009497411  0.0775 0.2655374   5.214151   5.172630
## 4 0.003792455  0.0080 0.007546442  0.0860 0.2596107   7.779754   8.177766
## 5 0.000000000  0.2630 0.293153795  0.1135 0.2412683   1.658472   1.564770
##   PR_NENGLAN PS_NENGLAN PR_EGRADS  PS_EGRADS PR_XMCARE PS_XMCARE PR_AVGPOV
## 0      0.065 0.06451613     0.045 0.04486423 0.1328685  0.140000  0.100333
## 1      0.162 0.16174402     0.082 0.12749533 0.1252266  0.135023  0.119000
## 2      0.079 0.07926829     0.061 0.08147579 0.1540154  0.162605  0.133333
## 3      0.000 0.00000000     0.028 0.03881279 0.1540154  0.162605  0.167667
## 4      0.067 0.06666667     0.066 0.05530254 0.1290082  0.137768  0.168667
## 5      0.033 0.03333333     0.029 0.04205607 0.1540154  0.162605  0.259000
##   PS_AVGPOV   PR_COMP01   PS_COMP06 PR_MCPE00   PS_MCPE05 PR_RCOD00
## 0  0.090000 0.011978945 0.000000000 0.0000000 0.050429185         0
## 1  0.124714 0.033712827 0.049683349 0.1396425 0.139701402         0
## 2  0.125286 0.022902898 0.020786949 0.0000000 0.007323325         0
## 3  0.183143 0.023203019 0.005174984 0.0000000 0.006114770         0
## 4  0.148571 0.004492105 0.000000000 0.1022032 0.090957447         0
## 5  0.238143 0.000000000 0.000000000 0.0000000 0.000000000         0
##   PS_RCOD06  PR_MOVEDE  PS_MOVEDL PR_XPOV00 PS_XPOV05 PR_XUNE00 PS_XUNE05
## 0         0 0.07951919 0.03215848       8.9      10.0       4.0       4.6
## 1         0 0.09787715 0.08880584      11.4      13.9       5.1       5.0
## 2         0 0.15837343 0.17086026      11.6      12.5       5.2       4.2
## 3         0 0.15655041 0.14110025      17.0      19.1       8.4       7.5
## 4         0 0.16189956 0.15215059      15.4      14.0       7.6       7.4
## 5         0 0.03997511 0.05570375      22.6      26.2       5.4       3.8
##   PR_XUNI00 PS_XUNI05 PR_DEND01 PS_DEND06 FIPS2 POLY_ID Regime  RID
## 0      13.1      18.5 25.946733  44.90969 27077       1      1 1316
## 1      14.4      17.0  7.829853  12.85676 53073       2      1 2946
## 2      14.7      19.0 10.787930  12.93828 30029       4      1 1576
## 3      18.0      20.6 26.402960  26.40296 30053       5      1 1588
## 4      20.2      17.9 15.988973  15.98897 16021       6      1  526
## 5      24.3      22.6 42.066288  36.30957 30005       7      0 1564

Here we create an index that is the length of the total number of counties we have. (Or more precisely, that is the length of all unique identifiers, which should be the same.) Check the length of the index to confirm it matches the total number of counties.

index<- seq.int(length(Counties$RID))
length(index)

## [1] 3098

Create big W matrix for panel analysis

First, generate a weight. Here we use the nb package to create a first-order queen contiguity weight for each county.

W_q1 <- poly2nb(Counties,queen=TRUE)

Check the length of the weight. This should equal the length of one time period, or the total rows of Counties we start with.

length(W_q1)

## [1] 3098

Convert the weight to a weight-list object.

wr<- nb2listw(W_q1)

Convert to a sparse matrix.

w1<- listw2dgCMatrix(wr)

Create time periods, and validate. Because we have two time periods here, the length should be twice the size of our original dataset.

time_id<- c("PR","PS")
tperiods <- length(time_id)
tperiods

## [1] 2

Calculate the Kronecker product using a sparse matrix with diagonal = tperiods.

I.T <- Diagonal(tperiods)
I.T

## 2 x 2 diagonal matrix of class "ddiMatrix"
##      [,1] [,2]
## [1,]    1    .
## [2,]    .    1

bigWmat <- kronecker(I.T,w1)
str(bigWmat)

## Formal class 'dgTMatrix' [package "Matrix"] with 6 slots
##   ..@ i       : int [1:36244] 16 30 42 7 31 3 26 43 49 60 ...
##   ..@ j       : int [1:36244] 0 0 0 1 1 2 2 2 2 2 ...
##   ..@ Dim     : int [1:2] 6196 6196
##   ..@ Dimnames:List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##   ..@ x       : num [1:36244] 0.25 0.25 0.111 0.143 0.25 ...
##   ..@ factors : list()

Inspect the big Weights matrix produced. It should be 3098 counties x 2 time periods = 6196 wide and long.

head(bigWmat)

## 6 x 6196 sparse Matrix of class "dgTMatrix"
##                                                                   
## [1,] . . .    .         .    . . .   . . .    . . .         .    .
## [2,] . . .    .         .    . . 0.5 . . .    . . .         .    .
## [3,] . . .    0.1111111 .    . . .   . . .    . . .         .    .
## [4,] . . 0.25 .         0.25 . . .   . . .    . . .         .    .
## [5,] . . .    0.3333333 .    . . .   . . .    . . 0.3333333 .    .
## [6,] . . .    .         .    . . .   . . 0.25 . . .         0.25 .
##                                                                          
## [1,] 0.3333333 . . . . . . . . . .         . .         . 0.3333333 ......
## [2,] .         . . . . . . . . . .         . .         . .         ......
## [3,] .         . . . . . . . . . 0.1111111 . .         . .         ......
## [4,] .         . . . . . . . . . .         . 0.2500000 . .         ......
## [5,] .         . . . . . . . . . .         . 0.3333333 . .         ......
## [6,] .         . . . . . . . . . .         . .         . .         ......
## 
##  .....suppressing columns in show(); maybe adjust 'options(max.print= *, width = *)'
##  ..............................

Convert back to list weights object.

bigW <- mat2listw(bigWmat,style="W")
summary(bigW)

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 6196 
## Number of nonzero links: 36244 
## Percentage nonzero weights: 0.09440898 
## Average number of links: 5.84958 
## Link number distribution:
## 
##    1    2    3    4    5    6    7    8    9   10   11   13   14 
##   42   82  180  564 1318 2136 1322  430   96   18    4    2    2 
## 42 least connected regions:
## 45 49 51 99 282 827 855 938 1388 1484 1565 1595 1631 1655 1662 1723 1730 1786 1932 1968 1969 3143 3147 3149 3197 3380 3925 3953 4036 4486 4582 4663 4693 4729 4753 4760 4821 4828 4884 5030 5066 5067 with 1 link
## 2 most connected regions:
## 1563 4661 with 14 links
## 
## Weights style: W 
## Weights constants summary:
##      n       nn   S0       S1       S2
## W 6196 38390416 6196 2227.779 25236.31

class(bigW)

## [1] "listw" "nb"

The stacked weight is ready for writing. Write to a .gal file for use in GeodaSpace.

write.nb.gal(bigW$n, "Counties_BigW.gal", oldstyle=FALSE)

Stacked Spatial Weight Tutorial

Marynia Kolak

November 8, 2017

Data Preparation

Create big W matrix for panel analysis