1 Loading the network

The network is generated from merging the characters.csv file, which describes each of the characters in the database, with situations.csv, which describes each “machine vision situation” we identified in the 500 creative works (novels, movies, videogames and artworks). We are only using the information about what characters do with machine vision technologies, and leave the other information (what is the technology doing, what are entities like governments or law enforcement doing) alone.

This gives us a dataframe we’ll call Character_verbs with a list of each character, their traits (gender, species, race, age and sexuality), and verbs describing the actions they take when interacting with machine vision. The verbs are either active (-ing verbs) or passive (-ed verbs) so we’ll add a TRUE/FALSE column called VerbActive.


#Import characters file (../data/Characters.csv)
#define column types and factors
AllCharacters <- read_csv(
  col_types = cols(
    CharacterID = col_integer(),
    Character = col_character(),
    Species = col_factor(levels = c(
      "Animal", "Cyborg", "Fictional", 
      "Human", "Machine", "Unknown")),
    Gender = col_factor(levels = c(
      "Female","Male","Non-binary or Other", "Trans Woman",
    RaceOrEthnicity = col_factor(levels = c(
      "Asian", "Black", "White", "Person of Colour", "Immigrant", "Indigenous",
      "Complex", "Unknown")),
    Age = col_factor(levels = c(
      "Child", "Young Adult", "Adult", "Elderly", 
    Sexuality = col_factor(levels = c(
      "Homosexual", "Heterosexual", "Bi-sexual", "Other",
    IsGroup = col_logical(),
    IsCustomizable = col_logical()

# Define Characters as the subset of AllCharacters that are not group characters or 
# customizable characters.
# Convert "Unknown" values to NA. 
# Simplify Species to just three species: human, machine or other (or NA)
# Simplify RaceOrEthnicity to just two options, White, PoC - or NA.
# NB: There are potential issues about simplifying race/ethnicity like this, 
# but given how messy our original categories are, and how few occurrances 
# there are of some, this seems the tidiest solution. The most problematic aspect 
# is that it merges Asian characters in with other PoC, but actually many 
# (most?) of the Asian characters are in works set in Asia so they are not 
# minority characters. Will write more about ethical aspects of trying to 
# count race at all elsewhere - note that this is a characteristic of the 
# REPRESENTATION of race/ethnicity, not "real" categories or descriptions of 
# real people. Use with caution.
# Select relevant columns.

Characters <- AllCharacters %>% 
        filter(IsGroup == FALSE & IsCustomizable == FALSE) %>% 
        na_if("Unknown") %>% 
        mutate(Species = recode(Species, 
                               "Machine" = "Robot",
                               "Cyborg" = "Robot",
                               "Human" = "Human",
                               .default = "Other")) %>% 
        mutate(Race = recode(RaceOrEthnicity,  
                             "Asian" = "PoC", 
                             "Black" = "PoC", 
                             "White" = "White", 
                             "Person of Colour" = "PoC",
                             "Indigenous" = "PoC",
                             "Immigrant" = "PoC",
                             "Complex"  = "PoC")) %>% 
        select(Character, Species, Gender, Sexuality, 
               Race, Age)

# To figure out what these characters actually do with the machine vision we need 
# to load data about the Situations in which they interact with machine vision
# technologies in the creative works in our sample.
# The following code imports data about the Situations from situations.csv, 
# sets the column types, and also tells R to skip the columns we’re not going 
# to need for this analysis.

# NB characterID isn't in this export, fix later (add column back in)
# Also remove GenreID later

Situations <- read_csv(
  col_types = cols(
    SituationID = col_integer(),
    Situation = col_skip(),
    Genre = col_character(),
    GenreID = col_skip(), 
    Character = col_character(),
    Entity = col_skip(),
    Technology = col_skip(),
    Verb = col_character()

# Filter just the three main genres - since narratives have subgenres (Movie, 
# Novel, etc) there would be a lot of duplicate info if we kept them.

# The fifth row has an NA in the Character and CharacterID columns - that 
# means that there is no value there. The verb in the verb column belongs 
# to an Entity or a Technology, not to a Character. We need to delete all the 
# rows with missing data.

Situations <- drop_na(Situations, Character) # replace w/CharID later

# Now we combine the two dataframes using the CharacterID (for now: Character) 
# column as the shared information.
Character_verbs <- merge(
        x = Situations, y = Characters, 
        by = "Character") %>% 
                        # replace w/CharID later
        select(Character, SituationID, Genre, Verb, Species, Gender, 
                Race, Age, Sexuality)

nodes_and_edges <- Character_verbs %>%
        add_count(Verb, name = "VerbCount") %>%
        arrange(desc(VerbCount)) %>%
        filter(VerbCount>20) %>%     #remove verbs that aren't used much
        mutate(VerbActive = (str_detect(Verb, "ing"))) 

# the mutate() creates a new col TRUE if -ing verb

# TODO: Add in Genre again when we have a new Situations export without duplicate genres for U. 

nodes_chars <- nodes_and_edges %>% 
        select(Title = Character, Species, Gender, Race, Age, Sexuality) %>% 
        add_column(NodeType = "Character", 
                   VerbCount = NA, 
                   VerbActive = NA) %>% 
        select(Title, NodeType, Species, Gender, Race, Age, Sexuality, VerbCount, VerbActive) %>% 

nodes_verbs <- nodes_and_edges %>% 
        select(Title = Verb, VerbActive, VerbCount) %>% 
        add_column(NodeType = "Verb", 
                   Species = NA, 
                   Gender = NA, 
                   Race = NA, 
                   Age = NA, 
                   Sexuality = NA) %>%   
        select(Title, NodeType, Species, Gender, 
               Race, Age, Sexuality, VerbCount, VerbActive) %>% 

nodes <- rbind(nodes_chars, nodes_verbs)

edges <- nodes_and_edges %>% 
        select(From = Character, To = Verb) %>% 
        add_column(EdgeType = "Character_action")

# set the seed, create a graph object from the dataframe object

net <- graph_from_data_frame(d=edges, vertices=nodes, directed=T) 

This is a bipartite network - there are two types of nodes, Characters and Verbs. We’ll add a column called “type” that will be TRUE for one kind of node and FALSE for the other kind.

## Add the "type" attribute to the network.
V(net)$type <- bipartite_mapping(net)$type  

V(net)$color <- ifelse(V(net)$type, "lightblue", "salmon")
E(net)$color <- "lightgray"

# Compute node degrees (#links) and use that to set node size:
deg <- degree(net, mode="in")
V(net)$size <- deg*0.3       # the number you multiply deg by changes size of nodes

# Verbs outlets will have name labels, characters will not:
# This takes the node Titles from the nodes dataframe, but only for those
# where the NodeType is "Verb". So now only the Verbs have labels.
V(net)$label <- ""
V(net)$label[V(net)$NodeType == "Verb"] <- nodes$Title[V(net)$NodeType == "Verb"]

     edge.arrow.size = 0.2, 
     label.dist = 1,