Parse IATA SSIM Schedules (Chapter 7) into a Data Frame.
Note: currently limited to types 2 and 3.
1AIRLINE STANDARD SCHEDULE DATA SET 1 000000001
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
2LAF W20 01NOV2001DEC2028NOV20SSIM EXAMPLE SCHEDULE 28NOV20CKENNY TEST AIRLINE 1/8/13/18 ET1800000002
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
3XAF 43310101J01NOV2001DEC2012345672CDG18451845+0100T1ALC20252020+01001F320XX XX XX XX XX XXX XX XX XX XX 1234 2L W 00000003
4XAF 43310101J XX020CDGALCAF TEST 000004
4XAF 43310101J XX021CDGALCAF TEST 000005
4XAF 43310101J XX026CDGALCAF TEST 000006
3XAF 43310101J01NOV2022NOV20 672CDG07000700+0100T1ALC08300830+01001F320XX XX XX XX XX XXX XX XX XX XX 1234 2L W 00000007
3XAF 12340101J01NOV2016NOV20 672CDG18451945+0100T1ALC21252120+01001F320XX XX XX XX XX XXX XX XX XX XX 1234 2L W 00000008
To install the latest version from github:
library(devtools)
install_github("sthonnard/ssimparser")
library(ssimparser)
Display a sample SSIM file:
ssimparser::get_ssim_sample()
Parse the sample file into a Data Frame:
ssim_df <- ssimparser::load_ssim(ssim_file = get_ssim_sample())
print(ssim_df)
# A tibble: 3 x 15
# Rowwise:
schedule_status iata_airline flight_number service_type period_from period_to days_of_operation adep_iata ades_iata aircraft_type_iata code_sharing std_utc sta_utc adep_icao ades_icao
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dttm> <dttm> <chr> <chr>
1 C AF 4331 J 01NOV20 01DEC20 1234567 CDG ALC 320 L 2020-11-01 17:45:00 2020-11-01 19:25:00 LFPG LEAL
2 C AF 4331 J 01NOV20 22NOV20 67 CDG ALC 320 L 2020-11-01 06:00:00 2020-11-01 07:30:00 LFPG LEAL
3 C AF 1234 J 01NOV20 16NOV20 67 CDG ALC 320 L 2020-11-01 18:45:00 2020-11-01 20:25:00 LFPG LEAL
Load any file from the filesystem:
ssim_df <- ssimparser::load_ssim(ssim_file = "/tmp/ssim.txt")
Create one schedule per day of operation:
ssim_df <- ssimparser::load_ssim(ssim_file = get_ssim_sample(), unpivot_days_of_op = TRUE)
print(ssim_df)
# A tibble: 11 x 15
# Rowwise:
schedule_status iata_airline flight_number service_type period_from period_to days_of_operati… adep_iata ades_iata aircraft_type_i… code_sharing std_utc
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dttm>
1 C AF 1234 J 01NOV20 16NOV20 6 CDG ALC 320 L 2020-11-01 18:45:00
2 C AF 1234 J 01NOV20 16NOV20 7 CDG ALC 320 L 2020-11-01 18:45:00
3 C AF 4331 J 01NOV20 01DEC20 1 CDG ALC 320 L 2020-11-01 17:45:00
4 C AF 4331 J 01NOV20 01DEC20 2 CDG ALC 320 L 2020-11-01 17:45:00
5 C AF 4331 J 01NOV20 01DEC20 3 CDG ALC 320 L 2020-11-01 17:45:00
6 C AF 4331 J 01NOV20 01DEC20 4 CDG ALC 320 L 2020-11-01 17:45:00
7 C AF 4331 J 01NOV20 01DEC20 5 CDG ALC 320 L 2020-11-01 17:45:00
8 C AF 4331 J 01NOV20 01DEC20 6 CDG ALC 320 L 2020-11-01 17:45:00
9 C AF 4331 J 01NOV20 01DEC20 7 CDG ALC 320 L 2020-11-01 17:45:00
10 C AF 4331 J 01NOV20 22NOV20 6 CDG ALC 320 L 2020-11-01 06:00:00
11 C AF 4331 J 01NOV20 22NOV20 7 CDG ALC 320 L 2020-11-01 06:00:00
# … with 3 more variables: sta_utc <dttm>, adep_icao <chr>, ades_icao <chr>
Parse SSIM to a nested Data Frame, which could be useful for investigating in RStudio:
ssim_df <- ssimparser::load_ssim(ssim_file = get_ssim_sample(), nested_df = TRUE)
print(ssim_df)
# A tibble: 1 x 3
# Rowwise:
schedule_status iata_airline type3
<chr> <chr> <list>
1 C AF <tibble [3 × 13]>
print(ssim_df$type3)
# A tibble: 3 x 13
# Rowwise:
flight_number service_type period_from period_to days_of_operati… adep_iata ades_iata aircraft_type_i… code_sharing std_utc sta_utc adep_icao
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dttm> <dttm> <chr>
1 4331 J 01NOV20 01DEC20 1234567 CDG ALC 320 L 2020-11-01 17:45:00 2020-11-01 19:25:00 LFPG
2 4331 J 01NOV20 22NOV20 67 CDG ALC 320 L 2020-11-01 06:00:00 2020-11-01 07:30:00 LFPG
3 1234 J 01NOV20 16NOV20 67 CDG ALC 320 L 2020-11-01 18:45:00 2020-11-01 20:25:00 LFPG
# … with 1 more variable: ades_icao <chr>
Expand the schedules to flights and display the traffic by month and departure airport ICAO:
library(dplyr)
ssimparser::load_ssim(ssim_file = get_ssim_sample(), expand_sched = TRUE) %>%
group_by(month = format(flight.flight_date,"%Y-%m"), adep_icao) %>%
summarise(n = n())
# A tibble: 2 x 3
# Groups: month [2]
month adep_icao n
<chr> <chr> <int>
1 2020-11 LFPG 42
2 2020-12 LFPG 1
Expand schedules to flights for multiple SSIM files:
library(dplyr)
ssimparser::load_ssim_flights(ssim_files = c(get_ssim_sample(datefrom = as.Date("2020-11-01"), dateto = as.Date("2020-12-01")),
get_ssim_sample(datefrom = as.Date("2020-11-15"), dateto = as.Date("2020-12-01")),
get_ssim_sample(datefrom = as.Date("2020-11-10"), dateto = as.Date("2020-12-20"))
)) %>%
group_by(month = format(flight.flight_date,"%Y-%m"), adep_icao) %>%
summarise(n = n())
# A tibble: 2 x 3
# Groups: month [2]
month adep_icao n
<chr> <chr> <int>
1 2020-11 LFPG 46
2 2020-12 LFPG 20
Expand the schedules to flights and display a line graph showing the traffic per day:
library(dplyr)
library(ggplot2)
ssimparser::load_ssim(ssim_file = get_ssim_sample(), expand_sched = TRUE) %>%
group_by(flight_day = as.Date(format(flight.flight_date,"%Y-%m-%d"))) %>%
summarise(flights = n()) %>%
ggplot(aes(flight_day, flights)) + geom_line()
Display a map of the connections between airports:
library(dplyr)
library(ggplot2)
library(airportr)
ssimparser::load_ssim(ssim_file = get_ssim_sample(), expand_sched = TRUE) %>%
mutate(ap1 = min(adep_iata, ades_iata),
ap2 = max(adep_iata, ades_iata)) %>%
group_by(ap1, ap2) %>%
summarise(flights = n()) %>%
mutate(ap1_det = airportr::airport_detail(ap1),
ap2_det = airportr::airport_detail(ap2),
citypair = paste0(ap1, "-", ap2)) -> flights
citypair_plot <- rbind(data.frame(citypair = flights$citypair, x = flights$ap1_det$Longitude, y = flights$ap1_det$Latitude, size = flights$flights, name = flights$ap1_det$ICAO),
data.frame(citypair = flights$citypair, x = flights$ap2_det$Longitude, y = flights$ap2_det$Latitude, size = flights$flights, name = flights$ap2_det$ICAO))
worldmap <- rnaturalearth::ne_countries(scale = 'medium', type = 'map_units', returnclass = 'sf')
map <- sf::st_crop(worldmap, xmin = min(citypair_plot$x) - 10,
xmax = max(citypair_plot$x) + 10,
ymin = min(citypair_plot$y) - 5,
ymax = max(citypair_plot$y) + 5)
ggplot2::ggplot() +
ggplot2::geom_sf(data = map, color = "#8c8c5a", fill = "#d7d7c1") +
geom_line(aes(x=citypair_plot$x, y=citypair_plot$y, group=citypair_plot$citypair), linetype = 1, size = 1, alpha = 0.5 ) +
geom_point(aes(x=citypair_plot$x, y=citypair_plot$y), size = 2) +
geom_label(aes(x=citypair_plot$x, y=citypair_plot$y, label = citypair_plot$name)) +
ggplot2::theme_bw() +
ggplot2::labs(title = "Flights") +
ggplot2::xlab("Longitude") +
ggplot2::ylab("Latitude")