The MultiNarrative Analysis tool should now be fully functional, allowing you to:

Upload JSON files containing narrative text
Analyze the narrative structure (timeline, plotline, and storyline)
Visualize the results with interactive charts
Generate and download a comprehensive PDF report

The GUI will:
Validate JSON format and provide specific error messages
Check if the JSON contains actual narrative text
Provide more detailed feedback at each step of the analysis
Include a help button that explains the expected file format with examples
The tool should be user-friendly and help users understand what went wrong if they upload an incorrect file format.

If you want to test it with a sample narrative, you can create a simple JSON file with this structure:

{
  "title": "The Three Little Pigs",
  "text": "Once upon a time, there were three little pigs. The first pig built a house of straw. The second pig built a house of sticks. The third pig built a house of bricks. One day, a big bad wolf came to the first pig's house. He huffed and puffed and blew the house down. The first pig ran to the second pig's house. The wolf followed and blew down the second house too. Both pigs ran to the third pig's house. The wolf tried to blow down the brick house, but he couldn't. He tried to enter through the chimney, but the third pig had a pot of boiling water. The wolf fell into the water and ran away. The three pigs lived happily ever after."
}

First, let's run the imports and setup:

In [16]:
# Install required packages
import sys
import subprocess
import pkg_resources

required_packages = ['pandas', 'numpy', 'matplotlib', 'seaborn', 'networkx', 
                    'ipywidgets', 'nltk', 'spacy', 'fpdf', 'pillow']

installed = {pkg.key for pkg in pkg_resources.working_set}
missing = [pkg for pkg in required_packages if pkg.lower() not in installed]

if missing:
    print(f"Installing missing packages: {missing}")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install'] + missing)
    
    # Install spacy model separately
    if 'spacy' in missing:
        subprocess.check_call([sys.executable, '-m', 'spacy', 'download', 'en_core_web_sm'])

# Now import all required packages
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output, FileLink
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import spacy
import re
from datetime import datetime
import io
from fpdf import FPDF
import base64
from PIL import Image
import tempfile
import os

# Download necessary NLTK resources
try:
    nltk.data.find('tokenizers/punkt')
    nltk.data.find('corpora/stopwords')
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('punkt')
    nltk.download('stopwords')
    nltk.download('wordnet')

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spaCy model...")
    import sys
    !{sys.executable} -m spacy download en_core_web_sm
    nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package punkt to /Users/sawp33/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sawp33/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/sawp33/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Added a new block to allow for multiple file types later

In [17]:
# First, let's add the necessary imports for handling different file formats
import json
import re
import io
import os
import tempfile
from datetime import datetime

# Try to import docx, but handle the case where it's not installed
try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False
    print("Warning: python-docx package not installed. DOCX file support is disabled.")
    print("To enable DOCX support, run: !pip install python-docx")

# Add these methods to the MultiNarrativeAnalyzer class

def load_file(self, file_content, file_name, auto_split=False):
    """
    Load narrative data from various file formats
    
    Parameters:
    - file_content: The content of the file as bytes
    - file_name: The name of the file
    - auto_split: Whether to automatically split long narratives
    
    Returns:
    - List of narrative IDs
    """
    # Determine file format from extension
    file_extension = file_name.split('.')[-1].lower()
    
    if file_extension == 'json':
        return self.load_data(file_content.decode('utf-8'), auto_split)
    elif file_extension == 'txt':
        return self.load_txt(file_content.decode('utf-8'), file_name, auto_split)
    elif file_extension == 'docx':
        if DOCX_AVAILABLE:
            return self.load_docx(file_content, file_name, auto_split)
        else:
            print("DOCX support is not available. Please install python-docx package.")
            return []
    elif file_extension == 'vtt':
        return self.load_vtt(file_content.decode('utf-8'), file_name, auto_split)
    else:
        return []

def load_txt(self, text_content, file_name, auto_split=False):
    """
    Load narrative data from plain text
    
    Parameters:
    - text_content: The content of the text file
    - file_name: The name of the file
    - auto_split: Whether to automatically split long narratives
    
    Returns:
    - List of narrative IDs
    """
    # Create a single narrative from the text file
    narrative_id = str(len(self.narratives) + 1)
    title = os.path.splitext(file_name)[0]
    
    # Create the narrative structure
    self.narratives[narrative_id] = {
        "id": narrative_id,
        "title": title,
        "text": text_content,
        "processed_text": "",
        "sentences": [],
        "events": [],
        "timeline": [],
        "plotline": {},
        "storyline": {}
    }
    
    # Set as current narrative
    self.current_narrative_id = narrative_id
    
    # Auto-split if needed
    if auto_split and len(text_content.split()) > 1000:
        return self._split_narrative(narrative_id)
    
    return [narrative_id]

def load_docx(self, file_content, file_name, auto_split=False):
    """
    Load narrative data from DOCX file
    
    Parameters:
    - file_content: The content of the DOCX file as bytes
    - file_name: The name of the file
    - auto_split: Whether to automatically split long narratives
    
    Returns:
    - List of narrative IDs
    """
    if not DOCX_AVAILABLE:
        print("DOCX support is not available. Please install python-docx package.")
        return []
        
    # Save the bytes to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp:
        tmp_name = tmp.name
        tmp.write(file_content)
    
    try:
        # Open the document
        doc = docx.Document(tmp_name)
        
        # Extract text from paragraphs
        text_content = "\n".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()])
        
        # Clean up
        os.unlink(tmp_name)
        
        # Create a narrative from the extracted text
        return self.load_txt(text_content, file_name, auto_split)
    
    except Exception as e:
        # Clean up in case of error
        os.unlink(tmp_name)
        print(f"Error loading DOCX file: {e}")
        return []

def load_vtt(self, vtt_content, file_name, auto_split=False):
    """
    Load narrative data from WebVTT subtitle file
    
    Parameters:
    - vtt_content: The content of the VTT file
    - file_name: The name of the file
    - auto_split: Whether to automatically split long narratives
    
    Returns:
    - List of narrative IDs
    """
    # Parse VTT content
    lines = vtt_content.split('\n')
    transcript = []
    
    # Skip header
    start_index = 0
    for i, line in enumerate(lines):
        if line.strip() == 'WEBVTT':
            start_index = i + 1
            break
    
    # Extract text from cues
    current_text = ""
    for line in lines[start_index:]:
        # Skip timestamp lines and empty lines
        if '-->' in line or line.strip() == '' or re.match(r'^\d+$', line.strip()):
            continue
        
        # Add non-empty lines to transcript
        if line.strip():
            current_text += line.strip() + " "
    
    # Create a narrative from the transcript
    if current_text:
        return self.load_txt(current_text, file_name, auto_split)
    
    return []

def _split_narrative(self, narrative_id, max_words=1000):
    """
    Split a long narrative into smaller chunks
    
    Parameters:
    - narrative_id: ID of the narrative to split
    - max_words: Maximum number of words per chunk
    
    Returns:
    - List of new narrative IDs
    """
    if narrative_id not in self.narratives:
        return []
    
    narrative = self.narratives[narrative_id]
    text = narrative['text']
    
    # Split text into sentences
    sentences = [sent.text.strip() for sent in nlp(text).sents]
    
    # Group sentences into chunks
    chunks = []
    current_chunk = []
    current_word_count = 0
    
    for sentence in sentences:
        sentence_word_count = len(sentence.split())
        
        if current_word_count + sentence_word_count > max_words and current_chunk:
            chunks.append(' '.join(current_chunk))
            current_chunk = [sentence]
            current_word_count = sentence_word_count
        else:
            current_chunk.append(sentence)
            current_word_count += sentence_word_count
    
    # Add the last chunk if not empty
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    # Create new narratives from chunks
    new_narrative_ids = []
    
    for i, chunk in enumerate(chunks):
        new_id = f"{narrative_id}_{i+1}"
        new_title = f"{narrative['title']} (Part {i+1})"
        
        self.narratives[new_id] = {
            "id": new_id,
            "title": new_title,
            "text": chunk,
            "processed_text": "",
            "sentences": [],
            "events": [],
            "timeline": [],
            "plotline": {},
            "storyline": {}
        }
        
        new_narrative_ids.append(new_id)
    
    # Remove the original narrative if we created chunks
    if new_narrative_ids:
        del self.narratives[narrative_id]
    
    return new_narrative_ids
!pip install python-docx



Now, let's define the MultiNarrativeAnalyzer class:

In [18]:
class MultiNarrativeAnalyzer:
    def __init__(self):
        self.narratives = {}  # Dictionary to store multiple narratives
        self.current_narrative_id = None
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
    
    def load_data(self, file_content, auto_split=False):
        """
        Load text data from JSON content
        
        Parameters:
        - file_content: JSON content as string
        - auto_split: If True, attempts to split a single narrative into multiple based on section headers
        
        Returns:
        - List of narrative IDs loaded
        """
        try:
            data = json.loads(file_content)
            narrative_ids = []
            
            # Case 1: JSON is an array of narratives
            if isinstance(data, list):
                for i, item in enumerate(data):
                    narrative_id = f"narrative_{i+1}"
                    if isinstance(item, dict):
                        # Extract title if available
                        title = item.get('title', narrative_id)
                        narrative_id = title.replace(' ', '_').lower()
                        
                        # Extract text
                        if 'text' in item:
                            text = item['text']
                        elif 'content' in item:
                            text = item['content']
                        else:
                            # Find first substantial string value
                            text = None
                            for key, value in item.items():
                                if isinstance(value, str) and len(value) > 100:
                                    text = value
                                    break
                        
                        if text and len(text) >= 50:
                            self.narratives[narrative_id] = {
                                'title': title,
                                'text': text,
                                'sentences': [],
                                'processed_text': "",
                                'events': [],
                                'timeline': [],
                                'plotline': {},
                                'storyline': {}
                            }
                            narrative_ids.append(narrative_id)
                    
                    elif isinstance(item, str) and len(item) >= 50:
                        self.narratives[narrative_id] = {
                            'title': narrative_id,
                            'text': item,
                            'sentences': [],
                            'processed_text': "",
                            'events': [],
                            'timeline': [],
                            'plotline': {},
                            'storyline': {}
                        }
                        narrative_ids.append(narrative_id)
            
            # Case 2: JSON is a single object with multiple narratives
            elif isinstance(data, dict):
                # Check if it's a collection of narratives
                narratives_found = False
                
                for key, value in data.items():
                    if isinstance(value, dict) and ('text' in value or 'content' in value):
                        # This looks like a narrative collection
                        narrative_id = key
                        title = value.get('title', narrative_id)
                        text = value.get('text', value.get('content', ''))
                        
                        if len(text) >= 50:
                            self.narratives[narrative_id] = {
                                'title': title,
                                'text': text,
                                'sentences': [],
                                'processed_text': "",
                                'events': [],
                                'timeline': [],
                                'plotline': {},
                                'storyline': {}
                            }
                            narrative_ids.append(narrative_id)
                            narratives_found = True
                
                # If no collection found, treat as single narrative
                if not narratives_found:
                    # Check for text field
                    if 'text' in data:
                        text = data['text']
                    elif 'content' in data:
                        text = data['content']
                    else:
                        # Find first substantial string value
                        text = None
                        for key, value in data.items():
                            if isinstance(value, str) and len(value) > 100:
                                text = value
                                break
                    
                    if text:
                        title = data.get('title', 'narrative_1')
                        narrative_id = title.replace(' ', '_').lower()
                        
                        # Check if we should auto-split the narrative
                        if auto_split and len(text) > 1000:
                            # Try to split by common section headers
                            section_pattern = r'(?:\n|^)(Chapter|Section|Part|Episode)\s+\d+[:\.\s]+'
                            sections = re.split(section_pattern, text)
                            
                            if len(sections) > 1:
                                # We found sections
                                section_titles = re.findall(section_pattern, text)
                                
                                # First section might be intro text before first header
                                if len(sections) > len(section_titles) + 1:
                                    intro = sections[0]
                                    if len(intro) >= 50:
                                        self.narratives['introduction'] = {
                                            'title': 'Introduction',
                                            'text': intro,
                                            'sentences': [],
                                            'processed_text': "",
                                            'events': [],
                                            'timeline': [],
                                            'plotline': {},
                                            'storyline': {}
                                        }
                                        narrative_ids.append('introduction')
                                    
                                    # Skip intro in further processing
                                    sections = sections[1:]
                                
                                # Process each section
                                for i, (section_title, section_text) in enumerate(zip(section_titles, sections)):
                                    if len(section_text) >= 50:
                                        section_id = f"{section_title.lower()}_{i+1}"
                                        self.narratives[section_id] = {
                                            'title': f"{section_title} {i+1}",
                                            'text': section_text,
                                            'sentences': [],
                                            'processed_text': "",
                                            'events': [],
                                            'timeline': [],
                                            'plotline': {},
                                            'storyline': {}
                                        }
                                        narrative_ids.append(section_id)
                            else:
                                # No clear sections, use as single narrative
                                self.narratives[narrative_id] = {
                                    'title': title,
                                    'text': text,
                                    'sentences': [],
                                    'processed_text': "",
                                    'events': [],
                                    'timeline': [],
                                    'plotline': {},
                                    'storyline': {}
                                }
                                narrative_ids.append(narrative_id)
                        else:
                            # Use as single narrative
                            self.narratives[narrative_id] = {
                                'title': title,
                                'text': text,
                                'sentences': [],
                                'processed_text': "",
                                'events': [],
                                'timeline': [],
                                'plotline': {},
                                'storyline': {}
                            }
                            narrative_ids.append(narrative_id)
            
            if not narrative_ids:
                raise ValueError("No valid narratives found in the JSON file")
            
            # Set the first narrative as current
            if narrative_ids:
                self.current_narrative_id = narrative_ids[0]
                
            return narrative_ids
            
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {e}")
            return []
        except ValueError as e:
            print(f"Data error: {e}")
            return []
        except Exception as e:
            print(f"Unexpected error: {e}")
            return []
    
    def set_current_narrative(self, narrative_id):
        """Set the current narrative for analysis"""
        if narrative_id in self.narratives:
            self.current_narrative_id = narrative_id
            return True
        return False
    
    def get_narrative_titles(self):
        """Get a list of all narrative titles"""
        return [(id, self.narratives[id]['title']) for id in self.narratives]
    
    def preprocess_text(self, narrative_id=None):
        """Preprocess the text for NLP analysis"""
        if narrative_id is None:
            narrative_id = self.current_narrative_id
        
        if narrative_id not in self.narratives:
            return False
        
        narrative = self.narratives[narrative_id]
        if not narrative['text']:
            return False
        
        # Tokenize into sentences
        narrative['sentences'] = sent_tokenize(narrative['text'])
        
        # Process each sentence
        processed_sentences = []
        for sentence in narrative['sentences']:
            # Tokenize words
            words = word_tokenize(sentence)
            
            # Remove stopwords and lemmatize
            filtered_words = [self.lemmatizer.lemmatize(word.lower()) 
                             for word in words 
                             if word.lower() not in self.stop_words and word.isalnum()]
            
            processed_sentences.append(' '.join(filtered_words))
        
        narrative['processed_text'] = ' '.join(processed_sentences)
        return True
    
    def extract_events(self, narrative_id=None):
        """Extract events from the text using spaCy"""
        if narrative_id is None:
            narrative_id = self.current_narrative_id
        
        if narrative_id not in self.narratives:
            return False
        
        narrative = self.narratives[narrative_id]
        if not narrative['sentences']:
            return False
        
        narrative['events'] = []
        for i, sentence in enumerate(narrative['sentences']):
            doc = nlp(sentence)
            
            # Extract events (verbs and their arguments)
            for token in doc:
                if token.pos_ == "VERB":
                    # Get subject
                    subjects = [subj.text for subj in token.head.children if subj.dep_ in ("nsubj", "nsubjpass")]
                    subject = subjects[0] if subjects else ""
                    
                    # Get object
                    objects = [obj.text for obj in token.children if obj.dep_ in ("dobj", "pobj")]
                    obj = objects[0] if objects else ""
                    
                    # Get time expressions
                    time_entities = [ent.text for ent in doc.ents if ent.label_ in ("DATE", "TIME")]
                    time = time_entities[0] if time_entities else ""
                    
                    # Create event
                    event = {
                        "sentence_id": i,
                        "sentence": sentence,
                        "verb": token.text,
                        "subject": subject,
                        "object": obj,
                        "time": time
                    }
                    narrative['events'].append(event)
        
        return len(narrative['events']) > 0
    
    def analyze_timeline(self, narrative_id=None):
        """Analyze the timeline of events"""
        if narrative_id is None:
            narrative_id = self.current_narrative_id
        
        if narrative_id not in self.narratives:
            return False
        
        narrative = self.narratives[narrative_id]
        if not narrative['events']:
            return False
        
        # Sort events by sentence_id to maintain chronological order
        sorted_events = sorted(narrative['events'], key=lambda x: x["sentence_id"])
        
        # Create timeline
        narrative['timeline'] = []
        for event in sorted_events:
            timeline_event = {
                "event": f"{event['subject']} {event['verb']} {event['object']}".strip(),
                "time": event["time"] if event["time"] else "Unspecified",
                "sentence": event["sentence"]
            }
            narrative['timeline'].append(timeline_event)
        
        return len(narrative['timeline']) > 0
    
    def analyze_plotline(self, narrative_id=None):
        """Analyze the plotline using Vossen's framework"""
        if narrative_id is None:
            narrative_id = self.current_narrative_id
        
        if narrative_id not in self.narratives:
            return False
        
        narrative = self.narratives[narrative_id]
        if not narrative['events']:
            return False
        
        # Initialize plotline components
        narrative['plotline'] = {
            "exposition": [],
            "rising_action": [],
            "climax": [],
            "falling_action": [],
            "resolution": []
        }
        
        # Simple heuristic: divide events into 5 parts
        total_events = len(narrative['events'])
        section_size = max(1, total_events // 5)
        
        # Assign events to plotline components
        for i, event in enumerate(narrative['events']):
            event_summary = f"{event['subject']} {event['verb']} {event['object']}".strip()
            
            if i < section_size:
                narrative['plotline']["exposition"].append(event_summary)
            elif i < section_size * 2:
                narrative['plotline']["rising_action"].append(event_summary)
            elif i < section_size * 3:
                narrative['plotline']["climax"].append(event_summary)
            elif i < section_size * 4:
                narrative['plotline']["falling_action"].append(event_summary)
            else:
                narrative['plotline']["resolution"].append(event_summary)
        
        return True
    
def analyze_storyline(self, narrative_id=None):
    """Analyze the storyline using narratology frameworks"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return False
    
    narrative = self.narratives[narrative_id]
    if not narrative['events']:
        return False
    
    # Initialize storyline components based on Caselli and Segers' framework
    narrative['storyline'] = {
        "characters": {},
        "settings": [],
        "conflicts": [],
        "themes": [],
        "narrative_arcs": []
    }
    
    # Extract characters (subjects and objects)
    characters = {}
    for event in narrative['events']:
        if event["subject"] and len(event["subject"]) > 1:
            if event["subject"] not in characters:
                characters[event["subject"]] = {"actions": [], "mentions": 0}
            characters[event["subject"]]["mentions"] += 1
            characters[event["subject"]]["actions"].append(event["verb"])
        
        if event["object"] and len(event["object"]) > 1:
            if event["object"] not in characters:
                characters[event["object"]] = {"actions": [], "mentions": 0}
            characters[event["object"]]["mentions"] += 1
    
    # Keep only significant characters (mentioned more than once)
    narrative['storyline']["characters"] = {k: v for k, v in characters.items() if v["mentions"] > 1}
    
    # Extract settings (time expressions)
    settings = set()
    for event in narrative['events']:
        if event["time"]:
            settings.add(event["time"])
    narrative['storyline']["settings"] = list(settings)
    
    # Simple conflict detection (negative verbs or emotional content)
    conflict_verbs = ["fight", "argue", "disagree", "oppose", "conflict", "battle", "struggle"]
    conflicts = []
    for event in narrative['events']:
        for conflict_verb in conflict_verbs:
            if conflict_verb in event["verb"].lower():
                conflicts.append(f"{event['subject']} {event['verb']} {event['object']}")
    narrative['storyline']["conflicts"] = conflicts
    
    # Simple theme extraction (frequent nouns and adjectives)
    doc = nlp(narrative['processed_text'])
    noun_freq = {}
    for token in doc:
        if token.pos_ in ["NOUN", "PROPN"] and token.text.lower() not in self.stop_words:
            if token.text.lower() not in noun_freq:
                noun_freq[token.text.lower()] = 0
            noun_freq[token.text.lower()] += 1
    
    # Get top themes (most frequent nouns)
    themes = [noun for noun, freq in sorted(noun_freq.items(), key=lambda x: x[1], reverse=True)[:5]]
    narrative['storyline']["themes"] = themes
    
    # Simple narrative arc detection
    narrative['storyline']["narrative_arcs"] = list(narrative['plotline'].keys())
    
    return True


Let's continue with the rest of the methods for the MultiNarrativeAnalyzer class:

In [19]:
def extract_events(self, narrative_id=None):
    """Extract events from the text using spaCy"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return False
    
    narrative = self.narratives[narrative_id]
    if not narrative['sentences']:
        return False
    
    narrative['events'] = []
    for i, sentence in enumerate(narrative['sentences']):
        doc = nlp(sentence)
        
        # Extract events (verbs and their arguments)
        for token in doc:
            if token.pos_ == "VERB":
                # Get subject
                subjects = [subj.text for subj in token.head.children if subj.dep_ in ("nsubj", "nsubjpass")]
                subject = subjects[0] if subjects else ""
                
                # Get object
                objects = [obj.text for obj in token.children if obj.dep_ in ("dobj", "pobj")]
                obj = objects[0] if objects else ""
                
                # Get time expressions
                time_entities = [ent.text for ent in doc.ents if ent.label_ in ("DATE", "TIME")]
                time = time_entities[0] if time_entities else ""
                
                # Create event
                event = {
                    "sentence_id": i,
                    "sentence": sentence,
                    "verb": token.text,
                    "subject": subject,
                    "object": obj,
                    "time": time
                }
                narrative['events'].append(event)
    
    return len(narrative['events']) > 0

def analyze_timeline(self, narrative_id=None):
    """Analyze the timeline of events"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return False
    
    narrative = self.narratives[narrative_id]
    if not narrative['events']:
        return False
    
    # Sort events by sentence_id to maintain chronological order
    sorted_events = sorted(narrative['events'], key=lambda x: x["sentence_id"])
    
    # Create timeline
    narrative['timeline'] = []
    for event in sorted_events:
        timeline_event = {
            "event": f"{event['subject']} {event['verb']} {event['object']}".strip(),
            "time": event["time"] if event["time"] else "Unspecified",
            "sentence": event["sentence"]
        }
        narrative['timeline'].append(timeline_event)
    
    return len(narrative['timeline']) > 0

def analyze_plotline(self, narrative_id=None):
    """Analyze the plotline using Vossen's framework"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return False
    
    narrative = self.narratives[narrative_id]
    if not narrative['events']:
        return False
    
    # Initialize plotline components
    narrative['plotline'] = {
        "exposition": [],
        "rising_action": [],
        "climax": [],
        "falling_action": [],
        "resolution": []
    }
    
    # Simple heuristic: divide events into 5 parts
    total_events = len(narrative['events'])
    section_size = max(1, total_events // 5)
    
    # Assign events to plotline components
    for i, event in enumerate(narrative['events']):
        event_summary = f"{event['subject']} {event['verb']} {event['object']}".strip()
        
        if i < section_size:
            narrative['plotline']["exposition"].append(event_summary)
        elif i < section_size * 2:
            narrative['plotline']["rising_action"].append(event_summary)
        elif i < section_size * 3:
            narrative['plotline']["climax"].append(event_summary)
        elif i < section_size * 4:
            narrative['plotline']["falling_action"].append(event_summary)
        else:
            narrative['plotline']["resolution"].append(event_summary)
    
    return True

def analyze_storyline(self, narrative_id=None):
    """Analyze the storyline using narratology frameworks"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return False
    
    narrative = self.narratives[narrative_id]
    if not narrative['events']:
        return False
    
    # Initialize storyline components based on Caselli and Segers' framework
    narrative['storyline'] = {
        "characters": {},
        "settings": [],
        "conflicts": [],
        "themes": [],
        "narrative_arcs": []
    }
    
    # Extract characters (subjects and objects)
    characters = {}
    for event in narrative['events']:
        if event["subject"] and len(event["subject"]) > 1:
            if event["subject"] not in characters:
                characters[event["subject"]] = {"actions": [], "mentions": 0}
            characters[event["subject"]]["mentions"] += 1
            characters[event["subject"]]["actions"].append(event["verb"])
        
        if event["object"] and len(event["object"]) > 1:
            if event["object"] not in characters:
                characters[event["object"]] = {"actions": [], "mentions": 0}
            characters[event["object"]]["mentions"] += 1
    
    # Keep only significant characters (mentioned more than once)
    narrative['storyline']["characters"] = {k: v for k, v in characters.items() if v["mentions"] > 1}
    
    # Extract settings (time expressions)
    settings = set()
    for event in narrative['events']:
        if event["time"]:
            settings.add(event["time"])
    narrative['storyline']["settings"] = list(settings)
    
    # Simple conflict detection (negative verbs or emotional content)
    conflict_verbs = ["fight", "argue", "disagree", "oppose", "conflict", "battle", "struggle"]
    conflicts = []
    for event in narrative['events']:
        for conflict_verb in conflict_verbs:
            if conflict_verb in event["verb"].lower():
                conflicts.append(f"{event['subject']} {event['verb']} {event['object']}")
    narrative['storyline']["conflicts"] = conflicts
    
    # Simple theme extraction (frequent nouns and adjectives)
    doc = nlp(narrative['processed_text'])
    noun_freq = {}
    for token in doc:
        if token.pos_ in ["NOUN", "PROPN"] and token.text.lower() not in self.stop_words:
            if token.text.lower() not in noun_freq:
                noun_freq[token.text.lower()] = 0
            noun_freq[token.text.lower()] += 1
    
    # Get top themes (most frequent nouns)
    themes = [noun for noun, freq in sorted(noun_freq.items(), key=lambda x: x[1], reverse=True)[:5]]
    narrative['storyline']["themes"] = themes
    
    # Simple narrative arc detection
    narrative['storyline']["narrative_arcs"] = list(narrative['plotline'].keys())
    
    return True


let's add visualization methods to the MultiNarrativeAnalyzer class:

In [6]:
def visualize_timeline(self, narrative_id=None):
    """Create a visualization of the timeline"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    if not narrative['timeline']:
        return None
    
    # Create a figure
    plt.figure(figsize=(12, 6))
    
    # Create a timeline
    events = [event["event"] for event in narrative['timeline']]
    times = [event["time"] for event in narrative['timeline']]
    
    # Create y-positions
    y_positions = list(range(len(events)))
    
    # Create the plot
    plt.plot([0] * len(events), y_positions, 'o', markersize=10, color='skyblue')
    
    # Add event labels
    for i, (event, time) in enumerate(zip(events, times)):
        plt.annotate(f"{event} ({time})", 
                    xy=(0.1, i), 
                    xycoords=('axes fraction', 'data'),
                    va='center',
                    fontsize=9)
    
    # Set the title and labels
    plt.title(f'Timeline of Events: {narrative["title"]}')
    plt.yticks([])
    plt.xticks([])
    plt.axis('off')
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def visualize_plotline(self, narrative_id=None):
    """Create a visualization of the plotline"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    if not narrative['plotline']:
        return None
    
    # Create a figure
    plt.figure(figsize=(12, 6))
    
    # Get plotline components
    components = list(narrative['plotline'].keys())
    event_counts = [len(narrative['plotline'][component]) for component in components]
    
    # Create x-positions (0 to 1)
    x_positions = np.linspace(0, 1, len(components))
    
    # Create y-positions based on event counts
    max_count = max(event_counts) if event_counts else 1
    y_positions = [count / max_count for count in event_counts]
    
    # Create the plot
    plt.plot(x_positions, y_positions, 'o-', linewidth=2, markersize=10, color='skyblue')
    
    # Add component labels
    for i, (component, count) in enumerate(zip(components, event_counts)):
        plt.annotate(f"{component.replace('_', ' ').title()}\n({count} events)", 
                    xy=(x_positions[i], y_positions[i]), 
                    xytext=(0, 10),
                    textcoords='offset points',
                    ha='center',
                    va='bottom',
                    fontsize=9)
    
    # Set the title and labels
    plt.title(f'Plotline Structure: {narrative["title"]}')
    plt.ylim(0, 1.2)
    plt.axis('off')
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def visualize_storyline(self, narrative_id=None):
    """Create a visualization of the storyline"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    if not narrative['storyline'] or not narrative['storyline']["characters"]:
        return None
    
    # Create a character network
    G = nx.Graph()
    
    # Add character nodes
    for character in narrative['storyline']["characters"]:
        G.add_node(character, size=narrative['storyline']["characters"][character]["mentions"] * 100)
    
    # Add edges between characters that appear in the same events
    character_pairs = []
    for event in narrative['events']:
        if event["subject"] in narrative['storyline']["characters"] and event["object"] in narrative['storyline']["characters"]:
            character_pairs.append((event["subject"], event["object"]))
    
    # Count frequency of character interactions
    edge_weights = {}
    for pair in character_pairs:
        if pair not in edge_weights:
            edge_weights[pair] = 0
        edge_weights[pair] += 1
    
    # Add weighted edges
    for pair, weight in edge_weights.items():
        G.add_edge(pair[0], pair[1], weight=weight)
    
    # Create a figure
    plt.figure(figsize=(12, 8))
    
    # Get node sizes
    node_sizes = [G.nodes[node]['size'] for node in G.nodes]
    
    # Get edge weights
    edge_weights = [G.edges[edge]['weight'] for edge in G.edges]
    
    # Draw the network
    pos = nx.spring_layout(G, seed=42)
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue')
    nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.7)
    nx.draw_networkx_labels(G, pos, font_size=10)
    
    plt.title(f'Character Relationship Network: {narrative["title"]}')
    plt.axis('off')
    plt.tight_layout()
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def compare_narratives(self, narrative_ids, comparison_type='characters'):
    """
    Compare multiple narratives
    
    Parameters:
    - narrative_ids: List of narrative IDs to compare
    - comparison_type: Type of comparison ('characters', 'events', 'themes')
    
    Returns:
    - Visualization buffer
    """
    if not narrative_ids or len(narrative_ids) < 2:
        return None
    
    # Validate narrative IDs
    valid_ids = [id for id in narrative_ids if id in self.narratives]
    if len(valid_ids) < 2:
        return None
    
    if comparison_type == 'characters':
        return self._compare_characters(valid_ids)
    elif comparison_type == 'events':
        return self._compare_events(valid_ids)
    elif comparison_type == 'themes':
        return self._compare_themes(valid_ids)
    else:
        return None

def _compare_characters(self, narrative_ids):
    """Compare characters across narratives"""
    # Create a figure
    plt.figure(figsize=(12, 8))
    
    # Get character data
    character_data = {}
    all_characters = set()
    
    for narrative_id in narrative_ids:
        narrative = self.narratives[narrative_id]
        if 'storyline' in narrative and 'characters' in narrative['storyline']:
            characters = narrative['storyline']['characters']
            character_data[narrative_id] = characters
            all_characters.update(characters.keys())
    
    if not character_data:
        return None
    
    # Prepare data for visualization
    character_list = list(all_characters)
    narrative_titles = [self.narratives[id]['title'] for id in narrative_ids]
    
    # Create matrix for heatmap
    matrix = np.zeros((len(narrative_ids), len(character_list)))
    
    for i, narrative_id in enumerate(narrative_ids):
        if narrative_id in character_data:
            for j, character in enumerate(character_list):
                if character in character_data[narrative_id]:
                    matrix[i, j] = character_data[narrative_id][character]['mentions']
    
    # Create heatmap
    sns.heatmap(matrix, annot=True, fmt='g', cmap='Blues',
               xticklabels=character_list, yticklabels=narrative_titles)
    
    plt.title('Character Comparison Across Narratives')
    plt.xlabel('Characters')
    plt.ylabel('Narratives')
    plt.tight_layout()
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def _compare_events(self, narrative_ids):
    """Compare event counts across narratives"""
    # Create a figure
    plt.figure(figsize=(12, 8))
    
    # Get event data
    event_counts = []
    narrative_titles = []
    
    for narrative_id in narrative_ids:
        narrative = self.narratives[narrative_id]
        if 'events' in narrative:
            event_counts.append(len(narrative['events']))
            narrative_titles.append(narrative['title'])
    
    if not event_counts:
        return None
    
    # Create bar chart
    plt.bar(narrative_titles, event_counts, color='skyblue')
    
    plt.title('Event Count Comparison Across Narratives')
    plt.xlabel('Narratives')
    plt.ylabel('Number of Events')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def _compare_themes(self, narrative_ids):
    """Compare themes across narratives"""
    # Create a figure
    plt.figure(figsize=(12, 8))
    
    # Get theme data
    theme_data = {}
    all_themes = set()
    
    for narrative_id in narrative_ids:
        narrative = self.narratives[narrative_id]
        if 'storyline' in narrative and 'themes' in narrative['storyline']:
            themes = narrative['storyline']['themes']
            theme_data[narrative_id] = themes
            all_themes.update(themes)
    
    if not theme_data:
        return None
    
    # Prepare data for visualization
    theme_list = list(all_themes)
    narrative_titles = [self.narratives[id]['title'] for id in narrative_ids]
    
    # Create matrix for heatmap
    matrix = np.zeros((len(narrative_ids), len(theme_list)))
    
    for i, narrative_id in enumerate(narrative_ids):
        if narrative_id in theme_data:
            for j, theme in enumerate(theme_list):
                if theme in theme_data[narrative_id]:
                    matrix[i, j] = 1  # Binary presence
    
    # Create heatmap
    sns.heatmap(matrix, annot=True, fmt='g', cmap='Blues',
               xticklabels=theme_list, yticklabels=narrative_titles)
    
    plt.title('Theme Comparison Across Narratives')
    plt.xlabel('Themes')
    plt.ylabel('Narratives')
    plt.tight_layout()
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close()
    
    return buf

def generate_report(self, narrative_id=None):
    """Generate a PDF report of the analysis"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    
    # Create a PDF
    pdf = FPDF()
    pdf.add_page()
    
    # Set font
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, f"Narrative Analysis Report: {narrative['title']}", ln=True, align="C")
    pdf.ln(10)
    
    # Add timeline analysis
    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "1. Timeline Event Analysis", ln=True)
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, "The timeline analysis identifies key events in chronological order, showing how the narrative unfolds over time.")
    
    # Add timeline events
    pdf.set_font("Arial", "I", 12)
    for i, event in enumerate(narrative['timeline'][:10]):  # Limit to first 10 events
        pdf.multi_cell(0, 10, f"{i+1}. {event['event']} ({event['time']})")
    
    # Add timeline visualization
    timeline_img = self.visualize_timeline(narrative_id)
    if timeline_img:
        # Save the image temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
            tmp_name = tmp.name
            img = Image.open(timeline_img)
            img.save(tmp_name)
        
        # Add image to PDF
        pdf.add_page()
        pdf.image(tmp_name, x=10, y=30, w=180)
        pdf.ln(120)  # Space for the image
        
        # Clean up
        os.unlink(tmp_name)
    
    # Add plotline analysis
    pdf.add_page()
    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "2. Plotline Analysis", ln=True)
    pdf.set_font("Arial", "", 12)
    pdf.multi_cell(0, 10, "The plotline analysis breaks down the narrative into the traditional five-act structure based on Vossen's framework.")
    
    # Add plotline components
    for component, events in narrative['plotline'].items():
        pdf.set_font("Arial", "B", 12)
        pdf.cell(0, 10, f"{component.replace('_', ' ').title()}:", ln=True)
        pdf.set_font("Arial", "", 12)
        for i, event in enumerate(events[:3]):  # Limit to first 3 events per component
            pdf.multi_cell(0, 10, f"- {event}")
    
    # Add plotline visualization
    plotline_img = self.visualize_plotline(narrative_id)
    if plotline_img:
        # Save the image temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
            tmp_name = tmp.name
            img = Image.open(plotline_img)
            img.save(tmp_name)
        
        # Add image to PDF
        pdf.add_page()
        pdf.image(tmp_name, x=10, y=30, w=180)
        pdf.ln(120)  # Space for the image


Extracting Objective Problem Formulations from Narratives

Extracting objective problem formulations from narratives is a fascinating challenge that combines narrative analysis with problem-solving frameworks. Here's a comprehensive approach to implement this functionality in your MultiNarrativeAnalyzer:

Conceptual Framework

To extract objective problem formulations, we need to:

Identify problems/conflicts within narratives
Structure these problems in an objective, formal way
Extract key parameters that define the problem space
Formulate solution constraints based on narrative context


Implementation Approach

In [20]:
def extract_problem_formulation(self, narrative_id=None):
    """
    Extract an objective problem formulation from a narrative
    
    Returns a structured representation of problems identified in the narrative,
    including stakeholders, constraints, and potential solution spaces.
    """
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    if not narrative['events'] or not narrative['storyline']:
        return None
    
    # Initialize problem formulation structure
    problem_formulation = {
        "core_problems": [],
        "stakeholders": {},
        "constraints": [],
        "objectives": [],
        "decision_variables": [],
        "solution_space": {}
    }
    
    # 1. Identify core problems from conflicts and negative events
    conflict_indicators = [
        "problem", "conflict", "challenge", "issue", "difficulty", "obstacle",
        "dilemma", "crisis", "trouble", "struggle", "dispute", "disagreement"
    ]
    
    # Extract problems from explicit conflict statements
    for event in narrative['events']:
        event_text = f"{event['subject']} {event['verb']} {event['object']}".lower()
        
        # Check for conflict indicators
        for indicator in conflict_indicators:
            if indicator in event_text:
                problem = {
                    "description": event['sentence'],
                    "actors": [event['subject']] if event['subject'] else [],
                    "type": "explicit",
                    "severity": self._estimate_problem_severity(event['sentence'])
                }
                problem_formulation["core_problems"].append(problem)
                break
    
    # Add problems from storyline conflicts
    if narrative['storyline']['conflicts']:
        for conflict in narrative['storyline']['conflicts']:
            if not any(conflict in p['description'] for p in problem_formulation["core_problems"]):
                problem = {
                    "description": conflict,
                    "actors": self._extract_entities_from_text(conflict),
                    "type": "narrative_conflict",
                    "severity": self._estimate_problem_severity(conflict)
                }
                problem_formulation["core_problems"].append(problem)
    
    # 2. Identify stakeholders (characters affected by problems)
    characters = narrative['storyline']['characters']
    for character, data in characters.items():
        # Calculate character's involvement in problems
        involvement_score = 0
        for problem in problem_formulation["core_problems"]:
            if character.lower() in problem['description'].lower():
                involvement_score += 1
        
        if involvement_score > 0 or data['mentions'] > 2:
            problem_formulation["stakeholders"][character] = {
                "involvement_score": involvement_score,
                "importance": data['mentions'],
                "actions": data['actions'][:5] if 'actions' in data else [],
                "interests": self._infer_character_interests(character, narrative['events'])
            }
    
    # 3. Extract constraints from narrative context
    # Physical/environmental constraints
    settings = narrative['storyline']['settings']
    for setting in settings:
        constraint = {
            "type": "environmental",
            "description": f"Action takes place in/during {setting}",
            "formalization": f"context = {setting}"
        }
        problem_formulation["constraints"].append(constraint)
    
    # Social/relational constraints
    for event in narrative['events']:
        if event['subject'] and event['object'] and event['verb']:
            # Look for obligation, prohibition or permission verbs
            if any(word in event['verb'].lower() for word in ["must", "should", "cannot", "forbidden", "allow", "permit"]):
                constraint = {
                    "type": "social",
                    "description": event['sentence'],
                    "formalization": f"{event['subject']} {event['verb']} {event['object']}"
                }
                problem_formulation["constraints"].append(constraint)
    
    # 4. Infer objectives from character actions and problem resolutions
    # Look at events in the resolution part of the plotline
    if "resolution" in narrative['plotline'] and narrative['plotline']["resolution"]:
        for event_text in narrative['plotline']["resolution"]:
            actors = self._extract_entities_from_text(event_text)
            for actor in actors:
                if actor in problem_formulation["stakeholders"]:
                    objective = {
                        "stakeholder": actor,
                        "description": event_text,
                        "type": "resolution_goal"
                    }
                    problem_formulation["objectives"].append(objective)
    
    # Add objectives based on character actions
    for character, data in problem_formulation["stakeholders"].items():
        # Get most frequent actions
        if 'actions' in data and data['actions']:
            action_counts = {}
            for action in data['actions']:
                if action not in action_counts:
                    action_counts[action] = 0
                action_counts[action] += 1
            
            # Top actions might indicate objectives
            top_actions = sorted(action_counts.items(), key=lambda x: x[1], reverse=True)[:2]
            for action, count in top_actions:
                if count > 1:  # Only consider repeated actions
                    objective = {
                        "stakeholder": character,
                        "description": f"{character} wants to {action}",
                        "type": "inferred_from_actions",
                        "confidence": min(count / 5, 1.0)  # Normalize confidence
                    }
                    problem_formulation["objectives"].append(objective)
    
    # 5. Identify decision variables (choices available to stakeholders)
    for character, data in problem_formulation["stakeholders"].items():
        # Extract unique actions
        if 'actions' in data:
            unique_actions = list(set(data['actions']))
            
            # Create decision variables for important characters
            if data['importance'] > 2:
                decision_var = {
                    "stakeholder": character,
                    "variable_name": f"action_{character.replace(' ', '_')}",
                    "domain": unique_actions[:5],  # Top 5 actions
                    "description": f"Actions available to {character}"
                }
                problem_formulation["decision_variables"].append(decision_var)
    
    # 6. Define solution space based on objectives and constraints
    problem_formulation["solution_space"] = {
        "dimensions": len(problem_formulation["decision_variables"]),
        "primary_objectives": [obj for obj in problem_formulation["objectives"] 
                              if obj.get("confidence", 0) > 0.5 or obj["type"] == "resolution_goal"],
        "constraint_count": len(problem_formulation["constraints"]),
        "stakeholder_count": len(problem_formulation["stakeholders"]),
        "problem_complexity": self._calculate_problem_complexity(problem_formulation)
    }
    
    # Store the problem formulation in the narrative data
    narrative['problem_formulation'] = problem_formulation
    
    return problem_formulation

def _extract_entities_from_text(self, text):
    """Extract named entities from text"""
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents if ent.label_ in ("PERSON", "ORG", "GPE")]
    
    # If no entities found, try extracting nouns
    if not entities:
        entities = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN") 
                   and token.text.lower() not in self.stop_words]
    
    return list(set(entities))

def _estimate_problem_severity(self, text):
    """Estimate the severity of a problem based on language used"""
    severity_indicators = {
        "critical": ["crisis", "emergency", "disaster", "catastrophe", "life-threatening", "urgent"],
        "high": ["serious", "severe", "major", "significant", "dangerous", "harmful"],
        "medium": ["problem", "issue", "challenge", "difficulty", "trouble", "concern"],
        "low": ["minor", "small", "slight", "trivial", "inconvenience", "nuisance"]
    }
    
    text_lower = text.lower()
    
    # Check for severity indicators
    for severity, indicators in severity_indicators.items():
        for indicator in indicators:
            if indicator in text_lower:
                return severity
    
    # Default severity
    return "medium"

def _infer_character_interests(self, character, events):
    """Infer a character's interests based on their actions"""
    interests = []
    
    # Find events where character is the subject
    character_events = [event for event in events 
                       if character.lower() in event['subject'].lower()]
    
    # Extract objects of character's actions
    objects = [event['object'] for event in character_events 
              if event['object'] and len(event['object']) > 1]
    
    # Count frequency of objects
    object_counts = {}
    for obj in objects:
        if obj.lower() not in object_counts:
            object_counts[obj.lower()] = 0
        object_counts[obj.lower()] += 1
    
    # Get top interests
    top_interests = sorted(object_counts.items(), key=lambda x: x[1], reverse=True)[:3]
    interests = [f"interest in {obj}" for obj, count in top_interests if count > 1]
    
    return interests

def _calculate_problem_complexity(self, problem_formulation):
    """Calculate the complexity of the problem space"""
    # Factors affecting complexity:
    # 1. Number of stakeholders
    # 2. Number of constraints
    # 3. Number of decision variables
    # 4. Number of conflicting objectives
    
    stakeholder_count = len(problem_formulation["stakeholders"])
    constraint_count = len(problem_formulation["constraints"])
    variable_count = len(problem_formulation["decision_variables"])
    objective_count = len(problem_formulation["objectives"])
    
    # Simple complexity score
    complexity_score = (stakeholder_count * 0.3 + 
                        constraint_count * 0.2 + 
                        variable_count * 0.3 + 
                        objective_count * 0.2)
    
    # Categorize complexity
    if complexity_score < 2:
        return "low"
    elif complexity_score < 5:
        return "medium"
    elif complexity_score < 10:
        return "high"
    else:
        return "very high"

def visualize_problem_formulation(self, narrative_id=None):
    """Visualize the problem formulation as a network graph"""
    if narrative_id is None:
        narrative_id = self.current_narrative_id
    
    if narrative_id not in self.narratives:
        return None
    
    narrative = self.narratives[narrative_id]
    if 'problem_formulation' not in narrative:
        return None
    
    problem_formulation = narrative['problem_formulation']
    
    # Create a graph
    G = nx.Graph()
    
    # Add problem nodes
    for i, problem in enumerate(problem_formulation["core_problems"]):
        problem_id = f"problem_{i}"
        G.add_node(problem_id, 
                  label=f"Problem: {problem['description'][:30]}...",
                  type="problem",
                  size=300)
    
    # Add stakeholder nodes
    for stakeholder in problem_formulation["stakeholders"]:
        G.add_node(stakeholder,
                  label=f"Stakeholder: {stakeholder}",
                  type="stakeholder",
                  size=200)
    
    # Add constraint nodes
    for i, constraint in enumerate(problem_formulation["constraints"]):
        constraint_id = f"constraint_{i}"
        G.add_node(constraint_id,
                  label=f"Constraint: {constraint['description'][:30]}...",
                  type="constraint",
                  size=150)
    
    # Add objective nodes
    for i, objective in enumerate(problem_formulation["objectives"]):
        objective_id = f"objective_{i}"
        G.add_node(objective_id,
                  label=f"Objective: {objective['description'][:30]}...",
                  type="objective",
                  size=200)
        
        # Connect objectives to stakeholders
        if "stakeholder" in objective and objective["stakeholder"] in G:
            G.add_edge(objective_id, objective["stakeholder"], type="has_objective")
    
    # Connect problems to stakeholders
    for i, problem in enumerate(problem_formulation["core_problems"]):
        problem_id = f"problem_{i}"
        for actor in problem["actors"]:
            if actor in G:
                G.add_edge(problem_id, actor, type="affects")
    
    # Create a figure
    plt.figure(figsize=(14, 10))
    
    # Define node colors by type
    node_colors = []
    for node in G.nodes():
        node_type = G.nodes[node].get('type', '')
        if node_type == 'problem':
            node_colors.append('lightcoral')
        elif node_type == 'stakeholder':
            node_colors.append('lightblue')
        elif node_type == 'constraint':
            node_colors.append('lightgreen')
        elif node_type == 'objective':
            node_colors.append('gold')
        else:
            node_colors.append('gray')
    
    # Define node sizes
    node_sizes = [G.nodes[node].get('size', 100) for node in G.nodes()]
    
    # Define edge colors by type
    edge_colors = []
    for u, v in G.edges():
        edge_type = G.edges[u, v].get('type', '')
        if edge_type == 'affects':
            edge_colors.append('red')
        elif edge_type == 'has_objective':
            edge_colors.append('gold')
        else:
            edge_colors.append('gray')
    
    # Create layout
    pos = nx.spring_layout(G, seed=42)
    
    # Draw the network
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_colors, alpha=0.8)
    nx.draw_networkx_edges(G, pos, width=1.5, edge_color=edge_colors, alpha=0.7)
    
    # Add labels with smaller font
    labels = {node: G.nodes[node].get('label', node) for node in G.nodes()}
    nx.draw_networkx_labels(G, pos, labels=labels, font_size=8, font_family='sans-serif')
    
    plt.title(f'Problem Formulation: {narrative["title"]}')
    plt.axis('off')
    plt.tight_layout()
    
    # Save the figure to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png', dpi=300)
    buf.seek(0)
    plt.close()
    
    return buf


let's debug

Now, let's implement the GUI:

In [21]:
def create_narrative_analysis_gui():
    # Create the analyzer
    analyzer = MultiNarrativeAnalyzer()
    
    # Determine supported file formats
    supported_formats = '.json,.txt,.vtt'
    format_description = "Supported formats: JSON, TXT, VTT"
    
    if DOCX_AVAILABLE:
        supported_formats += ',.docx'
        format_description = "Supported formats: JSON, TXT, DOCX, VTT"
    
    # Create widgets
    header = widgets.HTML(
        value=f"<h1>Computational Narrative Analysis</h1>"
              f"<p>Upload a file containing narrative text for analysis.</p>"
              f"<p><small>{format_description}</small></p>"
    )
    
    file_upload = widgets.FileUpload(
        accept=supported_formats,
        multiple=False,
        description='Upload File:',
        layout=widgets.Layout(width='300px')
    )
    
    auto_split_checkbox = widgets.Checkbox(
        value=False,
        description='Auto-split long narratives',
        disabled=False
    )
    
    narrative_dropdown = widgets.Dropdown(
        options=[],
        description='Narrative:',
        disabled=True,
        layout=widgets.Layout(width='500px')
    )
    
    analyze_button = widgets.Button(
        description='Analyze',
        disabled=True,
        button_style='primary',
        tooltip='Analyze the narrative',
        icon='check'
    )
    
    # Create a new button for problem formulation
    problem_button = widgets.Button(
        description='Extract Problem',
        disabled=True,
        button_style='warning',
        tooltip='Extract problem formulation',
        icon='question'
    )
    
    visualization_tabs = widgets.Tab()
    # Add a new tab for problem formulation
    visualization_tabs.children = [widgets.Output() for _ in range(4)]
    visualization_tabs.set_title(0, 'Timeline')
    visualization_tabs.set_title(1, 'Plotline')
    visualization_tabs.set_title(2, 'Character Network')
    visualization_tabs.set_title(3, 'Problem Formulation')
    
    comparison_section = widgets.VBox([
        widgets.HTML(value="<h3>Compare Narratives</h3>"),
        widgets.SelectMultiple(
            options=[],
            description='Select:',
            disabled=True,
            layout=widgets.Layout(width='500px', height='100px')
        ),
        widgets.Dropdown(
            options=[('Characters', 'characters'), ('Events', 'events'), ('Themes', 'themes')],
            description='Compare:',
            disabled=True
        ),
        widgets.Button(
            description='Compare',
            disabled=True,
            button_style='info',
            tooltip='Compare selected narratives',
            icon='random'
        ),
        widgets.Output()
    ])
    
    result_output = widgets.Output()
    
    # Define callbacks
    def on_file_upload_change(change):
        if change['type'] == 'change' and change['name'] == 'value' and len(change['new']) > 0:
            # Get the file content
            file_name = list(change['new'].keys())[0]
            file_content = change['new'][file_name]['content']
        
        with result_output:
            clear_output()
            print(f"Processing {file_name}...")
            
            # Load the data based on file type
            try:
                narrative_ids = analyzer.load_file(file_content, file_name, auto_split_checkbox.value)
                
                if narrative_ids:
                    print(f"Loaded {len(narrative_ids)} narratives.")
                    
                    # Debug: Show what narratives were loaded
                    for id in narrative_ids:
                        print(f"  - ID: {id}, Title: {analyzer.narratives[id]['title']}")
                    
                    # Update the narrative dropdown
                    narrative_dropdown.options = [(analyzer.narratives[id]['title'], id) for id in narrative_ids]
                    narrative_dropdown.disabled = False
                    analyze_button.disabled = False
                    
                    # Update comparison dropdown
                    comparison_section.children[1].options = [(analyzer.narratives[id]['title'], id) for id in narrative_ids]
                    comparison_section.children[1].disabled = False
                    comparison_section.children[2].disabled = False
                    comparison_section.children[3].disabled = False
                else:
                    print(f"Failed to load any narratives from the file. Make sure it's a supported format: {format_description}.")
            except Exception as e:
                print(f"Error processing file: {str(e)}")
                import traceback
                traceback.print_exc()
    
    def on_analyze_button_click(b):
        narrative_id = narrative_dropdown.value
        
        if not narrative_id:
            return
        
        with result_output:
            clear_output()
            print(f"Analyzing {analyzer.narratives[narrative_id]['title']}...")
            
            # Set as current narrative
            analyzer.set_current_narrative(narrative_id)
            
            # Preprocess
            if analyzer.preprocess_text():
                print("Text preprocessing complete.")
            else:
                print("Text preprocessing failed.")
                return
            
            # Extract events
            if analyzer.extract_events():
                print(f"Extracted {len(analyzer.narratives[narrative_id]['events'])} events.")
            else:
                print("Event extraction failed.")
                return
            
            # Analyze timeline
            if analyzer.analyze_timeline():
                print("Timeline analysis complete.")
            else:
                print("Timeline analysis failed.")
            
            # Analyze plotline
            if analyzer.analyze_plotline():
                print("Plotline analysis complete.")
            else:
                print("Plotline analysis failed.")
            
            # Analyze storyline
            if analyzer.analyze_storyline():
                print("Storyline analysis complete.")
            else:
                print("Storyline analysis failed.")
            
            # Update visualizations
            update_visualizations(narrative_id)
            
            # Enable problem formulation button
            problem_button.disabled = False
            
            print("Analysis complete!")
    
    def on_problem_button_click(b):
        narrative_id = narrative_dropdown.value
        
        if not narrative_id:
            return
        
        with result_output:
            clear_output()
            print(f"Extracting problem formulation from {analyzer.narratives[narrative_id]['title']}...")
            
            # Extract problem formulation
            problem_formulation = analyzer.extract_problem_formulation(narrative_id)
            
            if problem_formulation:
                print("Problem formulation extracted successfully.")
                
                # Display problem formulation summary
                print("\nProblem Summary:")
                print(f"- Core Problems: {len(problem_formulation['core_problems'])}")
                print(f"- Stakeholders: {len(problem_formulation['stakeholders'])}")
                print(f"- Constraints: {len(problem_formulation['constraints'])}")
                print(f"- Objectives: {len(problem_formulation['objectives'])}")
                print(f"- Problem Complexity: {problem_formulation['solution_space']['problem_complexity']}")
                
                # Update problem formulation visualization
                with visualization_tabs.children[3]:
                    clear_output()
                    problem_img = analyzer.visualize_problem_formulation(narrative_id)
                    if problem_img:
                        display(Image(data=problem_img.getvalue()))
                        
                        # Show detailed problem information
                        display(widgets.HTML(value="<h3>Problem Details</h3>"))
                        
                        # Core problems
                        if problem_formulation['core_problems']:
                            display(widgets.HTML(value="<h4>Core Problems:</h4>"))
                            for i, problem in enumerate(problem_formulation['core_problems']):
                                display(widgets.HTML(
                                    value=f"<p><b>Problem {i+1}:</b> {problem['description']}<br>"
                                          f"<b>Type:</b> {problem['type']}, <b>Severity:</b> {problem['severity']}</p>"
                                ))
                        
                        # Stakeholders
                        if problem_formulation['stakeholders']:
                            display(widgets.HTML(value="<h4>Key Stakeholders:</h4>"))
                            for stakeholder, data in problem_formulation['stakeholders'].items():
                                interests = ", ".join(data['interests']) if data['interests'] else "Unknown"
                                display(widgets.HTML(
                                    value=f"<p><b>{stakeholder}</b> (Importance: {data['importance']})<br>"
                                          f"<b>Interests:</b> {interests}</p>"
                                ))
                        
                        # Objectives
                        if problem_formulation['objectives']:
                            display(widgets.HTML(value="<h4>Objectives:</h4>"))
                            for i, objective in enumerate(problem_formulation['objectives']):
                                confidence = f", Confidence: {objective['confidence']:.2f}" if 'confidence' in objective else ""
                                display(widgets.HTML(
                                    value=f"<p><b>Objective {i+1}:</b> {objective['description']}<br>"
                                          f"<b>Stakeholder:</b> {objective.get('stakeholder', 'Unknown')}, "
                                          f"<b>Type:</b> {objective['type']}{confidence}</p>"
                                ))
                    else:
                        print("No problem formulation visualization available.")
            else:
                print("Problem formulation extraction failed.")
    
    def update_visualizations(narrative_id):
        # Timeline visualization
        with visualization_tabs.children[0]:
            clear_output()
            timeline_img = analyzer.visualize_timeline(narrative_id)
            if timeline_img:
                display(Image(data=timeline_img.getvalue()))
            else:
                print("No timeline data available.")
        
        # Plotline visualization
        with visualization_tabs.children[1]:
            clear_output()
            plotline_img = analyzer.visualize_plotline(narrative_id)
            if plotline_img:
                display(Image(data=plotline_img.getvalue()))
            else:
                print("No plotline data available.")
        
        # Character network visualization
        with visualization_tabs.children[2]:
            clear_output()
            storyline_img = analyzer.visualize_storyline(narrative_id)
            if storyline_img:
                display(Image(data=storyline_img.getvalue()))
            else:
                print("No character network data available.")
        
        # Clear problem formulation tab (will be populated when problem button is clicked)
        with visualization_tabs.children[3]:
            clear_output()
            print("Click 'Extract Problem' to generate problem formulation.")
    
    def on_compare_button_click(b):
        selected_narratives = list(comparison_section.children[1].value)
        comparison_type = comparison_section.children[2].value
        
        if not selected_narratives or len(selected_narratives) < 2:
            with comparison_section.children[4]:
                clear_output()
                print("Please select at least two narratives to compare.")
            return
        
        with comparison_section.children[4]:
            clear_output()
            print(f"Comparing {len(selected_narratives)} narratives...")
            
            comparison_img = analyzer.compare_narratives(selected_narratives, comparison_type)
            if comparison_img:
                display(Image(data=comparison_img.getvalue()))
            else:
                print("Comparison failed. Make sure all narratives have been analyzed.")
    
    # Connect callbacks
    file_upload.observe(on_file_upload_change)
    analyze_button.on_click(on_analyze_button_click)
    problem_button.on_click(on_problem_button_click)
    comparison_section.children[3].on_click(on_compare_button_click)
    
    # Create layout
    app = widgets.VBox([
        header,
        widgets.HBox([file_upload, auto_split_checkbox]),
        widgets.HBox([narrative_dropdown, analyze_button, problem_button]),
        visualization_tabs,
        comparison_section,
        result_output
    ])
    
    display(app)

# Run the application
create_narrative_analysis_gui()


VBox(children=(HTML(value='<h1>Computational Narrative Analysis</h1><p>Upload a file containing narrative textâ€¦

This tool can be very useful for literary analysis, storytelling research, or educational purposes. Users can now upload JSON files containing narrative text and get detailed insights into the timeline, plotline, and storyline structures.
The key change is replacing NarrativeAnalyzer() with MultiNarrativeAnalyzer() in the first line of the function. This ensures that the GUI uses your new class that supports multiple narratives.

I've also made some improvements to the GUI:

Added a checkbox for auto-splitting long narratives
Enhanced the comparison section to allow selecting multiple narratives
Added more informative status messages during processing
Improved error handling throughout the interface
This updated GUI should work correctly with your MultiNarrativeAnalyzer class.

If you want to further enhance the tool in the future, you might consider:

Adding support for more file formats (TXT, DOCX, etc.)
Implementing more advanced NLP techniques for better event extraction
Adding sentiment analysis to track emotional arcs in narratives
Creating more sophisticated visualizations
Adding the ability to compare multiple narratives

File Size and Formatting Guidelines for the Narrative Analysis Tool

Great question! Here are the key considerations regarding file size and formatting for the different file types supported by the tool:

File Size Limitations

Jupyter Notebook Environment Limits:

Most Jupyter environments have a default upload limit of around 25MB
For larger files, you might encounter browser memory issues
Processing Limitations:

Very large narratives (>100,000 words) may cause performance issues
The auto-split feature helps with this by breaking down large texts into ~1,000 word chunks
Memory Considerations:

Complex analyses (especially character networks) require more memory
Visualizations for very large narratives may be slow to generate
Formatting Guidelines by File Type

JSON Files
Expected Structure:
{
  "narratives": [
    {
      "id": "1",
      "title": "Narrative Title",
      "text": "Full narrative text goes here..."
    },
    {
      "id": "2",
      "title": "Another Narrative",
      "text": "Another narrative text..."
    }
  ]
}

Alternative Structure (also accepted):
[
  {
    "id": "1",
    "title": "Narrative Title",
    "text": "Full narrative text goes here..."
  }
]

The tool will look for either a "narratives" array or a direct array of narrative objects
TXT Files
Plain text files with narrative content
Best Practices:
Use proper paragraphs separated by line breaks
Include proper punctuation for better sentence detection
Avoid special formatting characters that might interfere with text processing
DOCX Files (if python-docx is installed)
Standard Microsoft Word documents
Supported Elements:
Regular paragraphs of text
Basic formatting is ignored (bold, italic, etc.)
Unsupported Elements:
Tables, images, and other complex elements are not extracted
Headers/footers are not included in the extracted text
Comments and tracked changes are not processed
VTT Files (WebVTT Subtitles)
Standard WebVTT subtitle format
Requirements:
Must include the "WEBVTT" header
Should follow standard timestamp format (HH:MM:SS.mmm --> HH:MM:SS.mmm)
Processing Notes:
Timestamps are ignored, only the text content is extracted
Speaker identifications (if present) are preserved in the text
Best Practices for Optimal Results

Text Quality:

Well-formed sentences with proper punctuation yield better event extraction
Clear character references (consistent naming) improve character network analysis
Chronological narratives work best for timeline visualization
Narrative Structure:

Narratives with clear plot structures (setup, conflict, resolution) work best
Character-driven narratives with interactions produce better network graphs
Problem-centric narratives yield better problem formulations
Pre-processing Tips:

For very long texts, enable the "Auto-split" option
Clean up any formatting artifacts before uploading
For subtitle files, consider pre-merging related lines for better sentence structure
Performance Optimization:

Start with smaller narratives to test the system
For comparative analysis, limit to 3-5 narratives at a time
Allow sufficient time for complex analyses to complete
The tool is designed to be flexible with input formats, but following these guidelines will help ensure the best analysis results. If you encounter any specific issues with file processing, the error messages in the output area should provide guidance on how to resolve them.

In [13]:
# Add this cell to your notebook to debug file loading issues

def debug_file_upload(change):
    if change['type'] == 'change' and change['name'] == 'value' and len(change['new']) > 0:
        # Get the file content
        file_name = list(change['new'].keys())[0]
        file_content = change['new'][file_name]['content']
        
        print(f"Debug: File uploaded: {file_name}")
        print(f"Debug: File size: {len(file_content)} bytes")
        
        # Check file type
        file_extension = file_name.split('.')[-1].lower()
        print(f"Debug: File extension: {file_extension}")
        
        # For text-based files, show a preview
        if file_extension in ['txt', 'json', 'vtt']:
            try:
                text_preview = file_content.decode('utf-8')[:200]
                print(f"Debug: Content preview: {text_preview}...")
            except UnicodeDecodeError:
                print("Debug: Could not decode file content as UTF-8")
        
        # For JSON files, try to parse
        if file_extension == 'json':
            try:
                json_data = json.loads(file_content.decode('utf-8'))
                if isinstance(json_data, dict) and 'narratives' in json_data:
                    print(f"Debug: JSON contains 'narratives' key with {len(json_data['narratives'])} items")
                elif isinstance(json_data, list):
                    print(f"Debug: JSON contains a list with {len(json_data)} items")
                else:
                    print(f"Debug: JSON structure: {type(json_data)}")
            except json.JSONDecodeError as e:
                print(f"Debug: JSON parsing error: {e}")

# Create a test upload widget
test_upload = widgets.FileUpload(
    accept='.json,.txt,.docx,.vtt',
    multiple=False,
    description='Test Upload:',
)

# Connect the debug callback
test_upload.observe(debug_file_upload)

# Display the test widget
display(test_upload)


FileUpload(value=(), accept='.json,.txt,.docx,.vtt', description='Test Upload:')