Code Monkey home page Code Monkey logo

tidytuesday-netflix's Introduction

Netflix Titles

分析例

  • 月あたりの新作件数
  • 国あたりの作品件数

簡単のため同名別作品は区別しないものとする。

renvパッケージをインストールしてから実行すること。

install.packages("renv")

Load Data

# CSVファイルのダウンロードと読み込み
# pinsパッケージを使ってキャッシュしておく
csv <- file.path(
  "https://raw.githubusercontent.com/rfordatascience/tidytuesday",
  "master/data/2021/2021-04-20/netflix_titles.csv"
)
board <- pins::board_url(c(netflix = csv))
netflix <- readr::read_csv(
  pins::pin_download(board, 'netflix'),
  show_col_types = FALSE
)

# 確認
dplyr::glimpse(netflix)
## Rows: 7,787
## Columns: 12
## $ show_id      <chr> "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s1…
## $ type         <chr> "TV Show", "Movie", "Movie", "Movie", "Movie", "TV Show",…
## $ title        <chr> "3%", "7:19", "23:59", "9", "21", "46", "122", "187", "70…
## $ director     <chr> NA, "Jorge Michel Grau", "Gilbert Chan", "Shane Acker", "…
## $ cast         <chr> "João Miguel, Bianca Comparato, Michel Gomes, Rodolfo Val…
## $ country      <chr> "Brazil", "Mexico", "Singapore", "United States", "United…
## $ date_added   <chr> "August 14, 2020", "December 23, 2016", "December 20, 201…
## $ release_year <dbl> 2020, 2016, 2011, 2009, 2008, 2016, 2019, 1997, 2019, 200…
## $ rating       <chr> "TV-MA", "TV-MA", "R", "PG-13", "PG-13", "TV-MA", "TV-MA"…
## $ duration     <chr> "4 Seasons", "93 min", "78 min", "80 min", "123 min", "1 …
## $ listed_in    <chr> "International TV Shows, TV Dramas, TV Sci-Fi & Fantasy",…
## $ description  <chr> "In a future where the elite inhabit an island paradise f…

Number of New Titles Per Month

集計

added_per_month <- netflix |>
  # タイトルと追加した月を抽出
  dplyr::transmute(
    title,
    month_added = date_added |>
      lubridate::mdy() |>                   # 文字列をDateに変換
      lubridate::floor_date(unit = "month") # 月ごとに集計するため日付を1日に変更
  ) |>
  # titleごとにmonth_addedの最小値を集計
  dplyr::group_by(title) |>
  dplyr::summarize(month_added = min(month_added)) |>
  # 2015年以降のタイトルのみを抽出
  dplyr::filter(lubridate::year(month_added) >= 2015) |>
  # 月ごとに行数をカウント
  dplyr::count(month_added)

dplyr::glimpse(added_per_month)
## Rows: 73
## Columns: 2
## $ month_added <date> 2015-01-01, 2015-02-01, 2015-03-01, 2015-04-01, 2015-05-0…
## $ n           <int> 1, 4, 5, 5, 7, 7, 8, 2, 7, 15, 4, 23, 44, 16, 17, 22, 13, …

可視化

ggplot2::ggplot(added_per_month) +
  # 高さ一定のタイルを使い色で件数を表現。幅は隙間が生じないよう調整
  ggplot2::geom_tile(
    ggplot2::aes(x = month_added, y = 1, fill = n, width = 33)
  ) +
  # タイルの色を調整
  ggplot2::scale_fill_gradient(
    name = NULL,                 # 凡例のタイトルは不要
    low = "black", high = "red", # NETFLIXカラーにする
    limits = c(0L, NA_integer_)  # 最小値を0に固定、最大値をデータから決定
  ) +
  # 1年ごとに目盛を追加
  ggplot2::scale_x_date(date_breaks = "1 years", date_labels = "%Y") +
  # NETFLIXと白字で記述
  ggplot2::annotate(
    "text", label = "NETFLIX", color = "white", size = 20,
    x = as.Date(-Inf), y = Inf, hjust = -0.01, vjust = 1.1
  ) +
  # タイトル追加
  ggplot2::labs(title = "Number of New Titles Per Month") +
  # 見た目をシンプルにする
  ggplot2::coord_cartesian(expand = FALSE) + # 不要な余白を除去
  ggplot2::theme_minimal() +
  ggplot2::theme(
    panel.background = ggplot2::element_rect(
      fill = "black" # 0件の部分はタイルがないので黒塗り
    ),
    panel.grid = ggplot2::element_blank(), # 目盛線不要
    axis.title = ggplot2::element_blank(), # 軸タイトル不要
    axis.text.y = ggplot2::element_blank() # 今回はy軸の値に意味がないので省略
  )

Number of Titles Per Country

集計

titles_per_country <- netflix |>
  # タイトルと国ごとに1行ずつ抽出
  dplyr::group_by(title, country) |>
  dplyr::slice(1L) |>
  # 国ごとに行数をカウント
  dplyr::group_by(country) |>
  dplyr::count()

dplyr::glimpse(titles_per_country)
## Rows: 682
## Columns: 2
## Groups: country [682]
## $ country <chr> "Argentina", "Argentina, Brazil, France, Poland, Germany, Denm…
## $ n       <int> 50, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 83, 1, 2, 1, 1, 1, 1, 1, …

country列には"Argentina, Brazil, France, Poland, Germany, Denmark"などと、カンマ区切りで複数の国が記録されている点に注意。

地図データとのマージ

地図データ読み込み。

region列が集計結果のcountry列に相当する。

world <- ggplot2::map_data("world")
dplyr::glimpse(world)
## Rows: 99,338
## Columns: 6
## $ long      <dbl> -69.89912, -69.89571, -69.94219, -70.00415, -70.06612, -70.0…
## $ lat       <dbl> 12.45200, 12.42300, 12.43853, 12.50049, 12.54697, 12.59707, …
## $ group     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ order     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 1…
## $ region    <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba…
## $ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …

集計結果と地図データのマージ。

地図に合わせて集計結果のcountry列を整形し、再集計してからマージする。

titles_per_region <- titles_per_country |>
  # country列をregion列に改名
  dplyr::rename(region = country) |>
  # 1行あたりにカンマ区切りで複数の国名が入っているので行方向に分割
  tidyr::separate_rows("region", sep = ", ") |>
  # 分割で生じた国名の重複を解消するため、国名ごとにタイトル数の和を集計
  dplyr::group_by(region) |>
  dplyr::summarize(n = sum(n)) |>
  # 地図データとマージ
  dplyr::right_join(world, by = "region")

dplyr::glimpse(titles_per_region)
## Rows: 99,338
## Columns: 7
## $ region    <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ n         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ long      <dbl> 74.89131, 74.84023, 74.76738, 74.73896, 74.72666, 74.66895, …
## $ lat       <dbl> 37.23164, 37.22505, 37.24917, 37.28564, 37.29072, 37.26670, …
## $ group     <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ order     <int> 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, …
## $ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …

可視化

ggplot2::ggplot(titles_per_region) +
  # 地図の表示。国をタイトル数で塗り潰す
  ggplot2::geom_map(
    ggplot2::aes(long, lat, map_id = region, fill = n),
    map = world
  ) +
  # 国ごとの色の塗り方を調整
  ggplot2::scale_fill_gradient(
    name = NULL,                 # 凡例のタイトルは不要
    low = "black", high = "red", # NETFLIXカラー化
    trans = "log",               # 件数のレンジが広いので対数をとる
    limits = c(1L, NA_integer_), # 最小値を1件にする
    label = round                # 凡例の数値を整数に丸める
  ) +
  # タイトルの追加
  ggplot2::labs(title = "NETFLIX: Number of Titles Per Country") +
  # 地図なので縦横比を揃える
  ggplot2::coord_equal() +
  # 見た目をシンプルにする
  ggplot2::theme_void()

tidytuesday-netflix's People

Contributors

atusy avatar

Stargazers

Soogie Kawacub avatar  avatar

Watchers

 avatar

tidytuesday-netflix's Issues

タイトル中の単語と動画の種類の相関は日本ではどんな感じだろ?

TV showとMovieのどちらに属するか、どの単語が指標になるかを分析した例がある。

https://twitter.com/juliasilge/status/1385734573951094785

日本語はまた違った傾向を示しそうだ。

なお、タイトルはローマ字で分かち書きされてるっぽい。

csv <- file.path(
    "https://raw.githubusercontent.com/rfordatascience/tidytuesday",
    "master/data/2021/2021-04-20/netflix_titles.csv"
)

netflix <- readr::read_csv(csv, show_col_types = FALSE)

netflix |>
    dplyr::filter(country == "Japan") |>
    dplyr::pull(title)
#>   [1] "​SAINT SEIYA: Knights of the Zodiac"                        
#>   [2] "37 Seconds"                                                
#>   [3] "A Silent Voice"                                            
#>   [4] "A Whisker Away"                                            
#>   [5] "A.I.C.O."                                                  
#>   [6] "Aggretsuko"                                                
#>   [7] "Aggretsuko: We Wish You a Metal Christmas"                 
#>   [8] "Ainori Love Wagon: African Journey"                        
#>   [9] "Ainori Love Wagon: Asian Journey"                          
#>  [10] "AJIN: Demi-Human"                                          
#>  [11] "Akame ga Kill!"                                            
#>  [12] "Alice in Borderland"                                       
#>  [13] "Angel Beats!"                                              
#>  [14] "Anohana: The Flower We Saw That Day"                       
#>  [15] "忍者ハットリくん"                                          
#>  [16] "ARASHI's Diary -Voyage-"                                   
#>  [17] "Atelier"                                                   
#>  [18] "Attack on Titan"                                           
#>  [19] "B: The Beginning"                                          
#>  [20] "Back Street Girls -GOKUDOLS-"                              
#>  [21] "BAKI"                                                      
#>  [22] "BEASTARS"                                                  
#>  [23] "Berserk: The Golden Age Arc I - The Egg of the King"       
#>  [24] "Berserk: The Golden Age Arc II - The Battle for Doldrey"   
#>  [25] "Berserk: The Golden Age Arc III - The Advent"              
#>  [26] "Beyblade Burst"                                            
#>  [27] "Beyblade Burst Rise"                                       
#>  [28] "Beyblade: Metal Fusion"                                    
#>  [29] "Black Butler"                                              
#>  [30] "BLAME!"                                                    
#>  [31] "Blazing Transfer Students"                                 
#>  [32] "Bleach"                                                    
#>  [33] "Bleach The Movie: Fade to Black"                           
#>  [34] "Bleach the Movie: Hell Verse"                              
#>  [35] "Blue Exorcist"                                             
#>  [36] "BNA"                                                       
#>  [37] "Cagaster of an Insect Cage"                                
#>  [38] "Cardcaptor Sakura"                                         
#>  [39] "CAROLE & TUESDAY"                                          
#>  [40] "Case Closed"                                               
#>  [41] "Cells at Work!"                                            
#>  [42] "Children of the Sea"                                       
#>  [43] "Children of the Whales"                                    
#>  [44] "CLANNAD"                                                   
#>  [45] "Code Geass: Lelouch of the Rebellion"                      
#>  [46] "Cyborg 009: Call of Justice"                               
#>  [47] "Devilman Crybaby"                                          
#>  [48] "Dinosaur King"                                             
#>  [49] "Dorohedoro"                                                
#>  [50] "DRAGON PILOT: Hisone & Masotan"                            
#>  [51] "Dragon Quest Your Story"                                   
#>  [52] "DRIFTING DRAGONS"                                          
#>  [53] "DTC Yukemuri Junjo Hen From High & Low"                    
#>  [54] "Durarara!!"                                                
#>  [55] "Erased"                                                    
#>  [56] "EVANGELION: DEATH (TRUE)²"                                 
#>  [57] "Expelled from Paradise"                                    
#>  [58] "Fairy Tail"                                                
#>  [59] "Fate/Apocrypha"                                            
#>  [60] "Fate/EXTRA Last Encore"                                    
#>  [61] "Fate/Grand Order -First Order-"                            
#>  [62] "Fate/stay night: Unlimited Blade Works"                    
#>  [63] "Fate/Zero"                                                 
#>  [64] "FINAL FANTASY XIV Dad of Light"                            
#>  [65] "Fireworks"                                                 
#>  [66] "Followers"                                                 
#>  [67] "Food Wars!: Shokugeki no Soma"                             
#>  [68] "Forest of Piano"                                           
#>  [69] "FullMetal Alchemist"                                       
#>  [70] "Fullmetal Alchemist: Brotherhood"                          
#>  [71] "GANTZ:O"                                                   
#>  [72] "Gargantia on the Verdurous Planet"                         
#>  [73] "Ghost Pain"                                                
#>  [74] "Ghost Tears"                                               
#>  [75] "Ghost Whispers"                                            
#>  [76] "Girls und Panzer"                                          
#>  [77] "Girls und Panzer der Film"                                 
#>  [78] "Glitter Force"                                             
#>  [79] "Glitter Force Doki Doki"                                   
#>  [80] "God Eater"                                                 
#>  [81] "Godzilla"                                                  
#>  [82] "GODZILLA City on the Edge of Battle"                       
#>  [83] "GODZILLA The Planet Eater"                                 
#>  [84] "Golden Time"                                               
#>  [85] "Good Morning Call"                                         
#>  [86] "GRANBLUE FANTASY the Animation"                            
#>  [87] "Great Pretender"                                           
#>  [88] "Gunslinger Girl"                                           
#>  [89] "Gurren Lagann"                                             
#>  [90] "Haikyu!!"                                                  
#>  [91] "Hi Score Girl"                                             
#>  [92] "Hibana: Spark"                                             
#>  [93] "High & Low The Movie"                                      
#>  [94] "High & Low The Movie 2 / End of Sky"                       
#>  [95] "High & Low The Movie 3 / Final Mission"                    
#>  [96] "High & Low The Red Rain"                                   
#>  [97] "Hikaru Utada Laughter in the Dark Tour 2018"               
#>  [98] "Hot Gimmick: Girl Meets Boy"                               
#>  [99] "Hunter X Hunter (2011)"                                    
#> [100] "ID-0"                                                      
#> [101] "In This Corner of the World"                               
#> [102] "Ingress: The Animation"                                    
#> [103] "InuYasha"                                                  
#> [104] "Inuyasha the Movie - L'isola del fuoco scarlatto"          
#> [105] "Inuyasha the Movie - La spada del dominatore del mondo"    
#> [106] "InuYasha the Movie: Affections Touching Across Time"       
#> [107] "InuYasha: The Movie 2: The Castle Beyond the Looking Glass"
#> [108] "Is It Wrong to Try to Pick Up Girls in a Dungeon?"         
#> [109] "Japan Sinks: 2020"                                         
#> [110] "Japanese Style Originator"                                 
#> [111] "Jimmy: The True Story of a True Idiot"                     
#> [112] "JoJo's Bizarre Adventure"                                  
#> [113] "JU-ON: Origins"                                            
#> [114] "K"                                                         
#> [115] "K-On!"                                                     
#> [116] "K-on! the movie"                                           
#> [117] "Kabaneri of the Iron Fortress: The Battle of Unato"        
#> [118] "Kantaro: The Sweet Tooth Salaryman"                        
#> [119] "Kill la Kill"                                              
#> [120] "Knights of Sidonia"                                        
#> [121] "Kuroko's Basketball"                                       
#> [122] "Kuromukuro"                                                
#> [123] "LAST HOPE"                                                 
#> [124] "Levius"                                                    
#> [125] "Little Witch Academia"                                     
#> [126] "LOST SONG"                                                 
#> [127] "Love and Fortune"                                          
#> [128] "Love and Hong Kong"                                        
#> [129] "Love, Chunibyo & Other Delusions!"                         
#> [130] "Lu Over the Wall"                                          
#> [131] "Lupin the 3rd: The Castle of Cagliostro: Special Edition"  
#> [132] "Magi: Adventure of Sinbad"                                 
#> [133] "Magi: The Labyrinth of Magic"                              
#> [134] "Maid-Sama!"                                                
#> [135] "March Comes in Like a Lion"                                
#> [136] "Marvel Anime: X-Men"                                       
#> [137] "Mary and the Witch's Flower"                               
#> [138] "Megalobox"                                                 
#> [139] "Midnight Diner"                                            
#> [140] "Midnight Diner: Tokyo Stories"                             
#> [141] "Million Yen Women"                                         
#> [142] "Mirai"                                                     
#> [143] "Miss Hokusai"                                              
#> [144] "Mob Psycho 100"                                            
#> [145] "Mobile Suit Gundam UC"                                     
#> [146] "Mobile Suit Gundam: Iron-Blooded Orphans"                  
#> [147] "Modest Heroes: Ponoc Short Films Theatre"                  
#> [148] "Monthly Girls' Nozaki Kun"                                 
#> [149] "MOTHER"                                                    
#> [150] "My Husband Won't Fit"                                      
#> [151] "Nagi-Asu: A Lull in the Sea"                               
#> [152] "Naruto"                                                    
#> [153] "Naruto Shippûden the Movie: Bonds"                         
#> [154] "Naruto Shippûden the Movie: The Will of Fire"              
#> [155] "Naruto Shippuden : Blood Prison"                           
#> [156] "Naruto Shippuden: The Movie"                               
#> [157] "Naruto Shippuden: The Movie: The Lost Tower"               
#> [158] "Naruto the Movie 2: Legend of the Stone of Gelel"          
#> [159] "Naruto the Movie 3: Guardians of the Crescent Moon Kingdom"
#> [160] "Naruto the Movie: Ninja Clash in the Land of Snow"         
#> [161] "Neon Genesis Evangelion"                                   
#> [162] "Ninja Hattori"                                             
#> [163] "NiNoKuni"                                                  
#> [164] "No Game No Life"                                           
#> [165] "No Game No Life: Zero"                                     
#> [166] "Nura: Rise of the Yokai Clan: Demon Capital"               
#> [167] "Okko's Inn"                                                
#> [168] "ONE PIECE"                                                 
#> [169] "One-Punch Man"                                             
#> [170] "Ouran High School Host Club"                               
#> [171] "Parasyte: The Maxim"                                       
#> [172] "Pokémon the Movie: I Choose You!"                          
#> [173] "Pokémon the Movie: Power of Us"                            
#> [174] "Pokémon: Indigo League"                                    
#> [175] "Pokémon: Mewtwo Strikes Back - Evolution"                  
#> [176] "Pop Team Epic"                                             
#> [177] "Puella Magi Madoka Magica"                                 
#> [178] "Re:Mind"                                                   
#> [179] "REA(L)OVE"                                                 
#> [180] "Record of Grancrest War"                                   
#> [181] "Revisions"                                                 
#> [182] "River's Edge"                                              
#> [183] "Road To High & Low"                                        
#> [184] "Romance Doll"                                              
#> [185] "Rurouni Kenshin"                                           
#> [186] "Saint Seiya"                                               
#> [187] "Saint Seiya: The Lost Canvas"                              
#> [188] "Samurai Gourmet"                                           
#> [189] "SCAMS"                                                     
#> [190] "Sirius the Jaeger"                                         
#> [191] "Smoking"                                                   
#> [192] "Sol Levante"                                               
#> [193] "Sonic X"                                                   
#> [194] "Soul Eater"                                                
#> [195] "Stay Tuned!"                                               
#> [196] "Switched"                                                  
#> [197] "Sword Art Online"                                          
#> [198] "Sword Art Online Alternative: Gun Gale Online"             
#> [199] "SWORDGAI The Animation"                                    
#> [200] "Teasing Master Takagi-san"                                 
#> [201] "Terrace House: Boys & Girls in the City"                   
#> [202] "Terrace House: Opening New Doors"                          
#> [203] "Terrace House: Tokyo 2019-2020"                            
#> [204] "The Could’ve-Gone-All-the-Way Committee"                   
#> [205] "The Devil Is a Part-Timer!"                                
#> [206] "The Disastrous Life of Saiki K."                           
#> [207] "The Disastrous Life of Saiki K.: Reawakened"               
#> [208] "The End of Evangelion"                                     
#> [209] "The Forest of Love"                                        
#> [210] "The Forest of Love: Deep Cut"                              
#> [211] "The Garden of Words"                                       
#> [212] "The Irregular at Magic High School"                        
#> [213] "The Many Faces of Ito"                                     
#> [214] "The Naked Director"                                        
#> [215] "The Promised Neverland"                                    
#> [216] "The Seven Deadly Sins"                                     
#> [217] "The Seven Deadly Sins the Movie: Prisoners of the Sky"     
#> [218] "Toradora!"                                                 
#> [219] "TUNA GIRL"                                                 
#> [220] "Ultramarine Magmell"                                       
#> [221] "Vampire Knight"                                            
#> [222] "Violet Evergarden"                                         
#> [223] "Violet Evergarden: Eternity and the Auto Memory Doll"      
#> [224] "Your lie in April"                                         
#> [225] "Yu-Gi-Oh!"                                                 
#> [226] "Zoids Wild"

Created on 2021-12-03 by the reprex package (v2.0.1)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.