Final Report CODE

2021-09-28

1. Load packages and cleaned dataset

library(tidyverse)
library(data.table)
library(viridis)
library(ggpubr)
all_trips_cleaned <- fread("C:\\Users\\izzyl\\Documents\\Portfolio\\01. Cyclistic\\03. Analysis\\01-03-03 all_trips_cleaned.csv")

5. Weather impact

# Load raw weather data 
raw_weather <- fread("C:\\Users\\izzyl\\Documents\\Portfolio\\01. Cyclistic\\02. Raw Data\\2710187.csv")

# Organise weather data by average temperature, average precipitation and average wind speed for each date. 
weather_organised <- raw_weather %>%
  
  group_by(DATE) %>%
  
  summarise(
    ave_temp = mean(TAVG, na.rm = TRUE),
    ave_precip = mean(PRCP, na.rm= TRUE), 
    ave_wind_speed = mean(AWND, na.rm = TRUE)
    )

# Create a data frame which tabulates the number of trips each day for casual riders 
casual <- all_trips_cleaned %>%
  
  group_by(
    YMD, 
    member_casual
    ) %>%
  
  filter(
    member_casual == "casual"
    ) %>%
  
  summarise(
    numtrips_casual = n()
    )

# Create a data frame which tabulates the number of trips each day for members
member <- all_trips_cleaned %>%
  
  group_by(
    YMD, 
    member_casual
    ) %>%
  
  filter(
    member_casual == "member"
    ) %>%
  
  summarise(
    numtrips_member = n()
    )

# Merge the casual and member data frames into one 
cas_mem <- merge(
  casual, 
  member, 
  by = "YMD"
  )

# Change the YMD string type to character string to avoid timezone conversion mistakes 
cas_mem <- cas_mem %>% 
  mutate( 
    YMD = as.character(YMD)
    )

# Set the primary linking key (the date) in the weather data frame to YMD to match the cas_mem data frame 
weather_organised <- weather_organised %>%
  
  mutate(
    DATE = as.character(DATE)
    ) %>%
  
  rename(YMD = DATE)

# Merge the weather data and cas_mem data frames into one 
merged <- merge(
  weather_organised, 
  cas_mem, 
  by = "YMD"
  )
#1. Plot average temperature vs number of trips per day
ave_temp <- ggplot(
  merged, 
  aes(
    y = ave_temp
    )
  ) + 
  
  # Create scatter points 
  geom_point(
    aes(
      x = numtrips_casual, 
      color = "Casual"
      ),
    alpha = 0.5
    ) +

  geom_point(
    aes(
      x = numtrips_member,
      color = "Member",
      ),
    alpha = 0.5
    ) +
  
  # Add title and axis labels 
  labs(
    title = "Average temperature vs no. of trips",
    y = "Average temperature (F)", 
    x = "Number of trips"
    ) +
  
  #Use viridis colour scheme 
  scale_color_viridis_d() +
  
  # Set light theme 
  theme_light() +
  
  # Remove legend title and center title
  theme(
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5)
    )

# 2. Plot average wind speed vs number of trips per day
ave_wdspd <- ggplot(
  merged, 
  aes(
    y = ave_wind_speed
    )
  ) + 
  
  # Create scatter points 
  geom_point(
    aes(
      x = numtrips_casual, 
      color = "Casual"
      ),
    alpha = 0.5
    ) +

  geom_point(
    aes(
      x = numtrips_member,
      color = "Member",
      ),
    alpha = 0.5
    ) +
  
  # Add title and axis labels 
  labs(
    title = "Average wind speed vs no. of trips", 
    y = "Average wind speed (mph)", 
    x = "Number of trips"
    ) +
  
  #Use viridis colour scheme 
  scale_color_viridis_d() +
  
  # Set light theme 
  theme_light() +
  
  # Remove legend title and center title
  theme(
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5)
    )

# 3. Plot average precipitation vs number of trips per day
ave_precip <- ggplot(
  merged, 
  aes(
    y = ave_precip
    )
  ) + 
  
  # Create scatter points 
  geom_point(
    aes(
      x = numtrips_casual, 
      color = "Casual"
      ),
    alpha = 0.5
    ) +

  geom_point(
    aes(
      x = numtrips_member,
      color = "Member",
      ),
    alpha = 0.5
    ) +
  
  # Add title and axis labels 
  labs(
    title = "Average precipitation vs no. of trips", 
    y = "Average precipitation (inches)", 
    x = "Number of trips"
    ) +
  
  #Use viridis colour scheme 
  scale_color_viridis_d() +
  
  # Set light theme 
  theme_light() +
  
  # Remove legend title and center title
  theme(
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5)
    )
  
# Combine all 3 plots into one 
p4 <- ggarrange(
  ave_temp, 
  ave_precip, 
  ave_wdspd, 
  ncol = 2, 
  nrow = 2,
  common.legend = TRUE, 
  legend = "bottom"
  )
p4

6. Statistic summary

# Create function which calculates mode 
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

# Create a data frame which summarises the all_trips_cleaned dataset by important variables 
statistic_summary <- all_trips_cleaned %>%
  
  group_by(
    member_casual
  ) %>%
  
  summarize(
    ave_ride_length_mins = (mean(ride_length, na.rm = TRUE))/60, 
    mode_day_of_week = getmode(day_of_week),
    mode_month = getmode(month),
    mode_time_of_day = getmode(ToD), 
    ave_time_of_day = format(mean(ToD_convert, na.rm = TRUE), "%H:%M:%S")
  ) 
kable(head(statistic_summary))
member_casual ave_ride_length_mins mode_day_of_week mode_month mode_time_of_day ave_time_of_day
casual 37.62571 Saturday 7 17:19:15 15:11:39
member 14.38970 Wednesday 8 17:20:37 14:32:12