Experiment 1
In [ ]:
Copied!
# Ref : pipeline_6a_trisociation_base
# Ref : pipeline_6a_trisociation_base
In [1]:
Copied!
# imports
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
import os
import time
import pandas as pd
from collections import defaultdict
import re
# imports
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
import os
import time
import pandas as pd
from collections import defaultdict
import re
In [2]:
Copied!
# load API key
dotenv_path = # your .env filepath with OPENAI_API_KEY
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# load API key
dotenv_path = # your .env filepath with OPENAI_API_KEY
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
In [3]:
Copied!
# main constants
GPT_MODEL_TEXT_ALIAS = "gpt-4-turbo-preview" # points to latest GPT model
GPT_MODEL_TEXT = "gpt-4-0125-preview"
# main constants
GPT_MODEL_TEXT_ALIAS = "gpt-4-turbo-preview" # points to latest GPT model
GPT_MODEL_TEXT = "gpt-4-0125-preview"
In [4]:
Copied!
# variations
n = 100 # number of trisociations in a run
runs = 3 # number of runs
# variations
n = 100 # number of trisociations in a run
runs = 3 # number of runs
In [5]:
Copied!
# base prompt
base_prompt = f"""
Generate {n} trisociations in the format below. No additional comments or elaborations. Avoid duplicate words across trisociations.
1. Word - Word - Word
2. Word - Word - Word
...
"""
# base prompt
base_prompt = f"""
Generate {n} trisociations in the format below. No additional comments or elaborations. Avoid duplicate words across trisociations.
1. Word - Word - Word
2. Word - Word - Word
...
"""
In [6]:
Copied!
#instantiate client
client = OpenAI()
#instantiate client
client = OpenAI()
In [7]:
Copied!
# Function to perform a single run and collect data
def perform_run(prompt_text):
start_time = time.time()
completion = client.chat.completions.create(
model=GPT_MODEL_TEXT_ALIAS,
messages=[
{"role": "system", "content": prompt_text},
]
)
end_time = time.time()
trisociation_answer = completion.choices[0].message.content
execution_time = end_time - start_time
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
total_tokens = completion.usage.total_tokens
return {
'execution_time': execution_time,
'prompt_tokens': prompt_tokens,
'completion_tokens': completion_tokens,
'total_tokens': total_tokens,
'trisociations': trisociation_answer
}
# Function to perform a single run and collect data
def perform_run(prompt_text):
start_time = time.time()
completion = client.chat.completions.create(
model=GPT_MODEL_TEXT_ALIAS,
messages=[
{"role": "system", "content": prompt_text},
]
)
end_time = time.time()
trisociation_answer = completion.choices[0].message.content
execution_time = end_time - start_time
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
total_tokens = completion.usage.total_tokens
return {
'execution_time': execution_time,
'prompt_tokens': prompt_tokens,
'completion_tokens': completion_tokens,
'total_tokens': total_tokens,
'trisociations': trisociation_answer
}
In [8]:
Copied!
# List to store results from all runs
results = []
# Perform multiple runs
for _ in range(runs):
result = perform_run(base_prompt)
results.append(result)
# Additionally, print each run's trisociations
for i, r in enumerate(results, 1):
print(f"Run {i} trisociations:\n{r['trisociations']}\n")
# List to store results from all runs
results = []
# Perform multiple runs
for _ in range(runs):
result = perform_run(base_prompt)
results.append(result)
# Additionally, print each run's trisociations
for i, r in enumerate(results, 1):
print(f"Run {i} trisociations:\n{r['trisociations']}\n")
Run 1 trisociations: 1. Sunshine - Riddle - Lighthouse 2. Glacier - Equation - Cobweb 3. Symphony - Giraffe - Penthouse 4. Blueprint - Tulip - Avalanche 5. Camel - Fountain - Blackboard 6. Thunderstorm - Encyclopedia - Parachute 7. Chameleon - Volcano - Library 8. Sailboat - Prism - Beethoven 9. Windmill - Kangaroo - Fossil 10. Sequoia - Pyramid - Telescope 11. Harmonica - Oasis - Chessboard 12. Canoe - Meteor - Sculpture 13. Tornado - Flamingo - Opera 14. Coral - Puzzle - Cathedral 15. Bison - Candle - Spacesuit 16. Bamboo - Hurricane - Palette 17. Sundial - Elephant - Typewriter 18. Glacier - Circuit - Throne 19. Pegasus - Waterfall - Sonnet 20. Lantern - Cactus - Helicopter 21. Neon - Dune - Saxophone 22. Kite - Sequencer - Crocodile 23. Fedora - Rainbow - Monastery 24. Acorn - Starship - Mosaic 25. Mirage - Raven - Keyboard 26. Dew - Sphinx - Cellar 27. Satellite - Gazelle - Lullaby 28. Willow - Molecule - Gate 29. Lyric - Moss - Zeppelin 30. Geyser - Porcupine - Harp 31. Reef - Silhouette - Carousel 32. Eagle - Atom - Theatre 33. Motif - Iceberg - Salamander 34. Dragonfly - Circuitry - Novel 35. Halo - Cliff - Tuba 36. Lotus - Avalanche - Diary 37. Cavern - Tarantula - Ballet 38. Summit - Tornado - Violin 39. Nebula - Camel - Rhapsody 40. Archipelago - Coyote - Fountain Pen 41. Panther - Quasar - Windchime 42. Ocarina - Pyramid - Willow 43. Lantern - Badger - Sonnet 44. Thunder - Labyrinth - Harpsichord 45. Constellation - Dolphin - Library 46. Zephyr - Crane - Sermon 47. Sequoia - Oasis - Compass 48. Lynx - Cliffside - Trombone 49. Obsidian - Rainforest - Novella 50. Sandstone - Lavender - Chapel 51. Torrent - Sequencer - Gondola 52. Asteroid - Hedgehog - Cantata 53. Dandelion - Sphynx - Saxophone 54. Arrowhead - Tundra -Script 55. Tesseract - Wheat - Abbey 56. Albatross - Castle - Algorithm 57. Brocade - Mesa - Mandolin 58. Eclipse - Peacock - Mansion 59. Fern - Pantheon - Oboe 60. Ivy - Aquarium - Sonata 61. Jupiter - Sequoia - Scroll 62. Kraken - Sahara - Banquet 63. Lagoon - Basilica - Timpani 64. Mahogany - Raptor - Tablet 65. Nimbus - Colosseum - Flute 66. Obsidian - Pelican - Aria 67. Pinnacle - Savannah - Harlequin 68. Quartz - Tundra - Concerto 69. Riptide - Orchard - Tapestry 70. Saffron - Glacier - Lecture 71. Tamarind - Prairie - Epistle 72. Umbrella - Jungle - Ballad 73. Vanguard - Reef - Discourse 74. Watchtower - Panther - Madrigal 75. Xylophone - Canyon - Treatise 76. Yacht - Serengeti - Anthem 77. Zephyr - Archipelago - Canticle 78. Acropolis - Beetle - Rondo 79. Badlands - Crane - Sonatina 80. Cirrus - Chaparral - Novella 81. Delta - Egret - Minuet 82. Expanse - Falcon - Epilog 83. Fjord - Gorge - Overture 84. Grove - Heron - Prelude 85. Highland - Ibis - Recital 86. Inlet - Jackal - Saga 87. Jungle - Kestrel - Shanty 88. Knoll - Lemming - Sketch 89. Ledge - Mongoose - Tale 90. Moor - Nighthawk - Verse 91. Nook - Ocelot - Vignette 92. Outcrop - Puffin - Wordplay 93. Plateau - Quail - Yarn 94. Quarry - Raven - Narrative 95. Ridge - Serval - Lyric 96. Shoal - Tern - Monologue 97. Tundra - Urial - Dialogue 98. Upland - Vole - Sonnet 99. Vale - Warbler - Jingle 100. Wetland - Xerus - Rhyme Run 2 trisociations: 1. Mountain - Friendship - Calculator 2. Ocean - Candle - Conspiracy 3. Guitar - Volcano - Mailbox 4. Library - Elephant - Skateboard 5. Thunder - Necklace - Algorithm 6. Rainbow - Secret - Refrigerator 7. Castle - Butterfly - Equation 8. Cloud - Compass - Festival 9. Sunflower - Whisper - Avalanche 10. Dream - Saddle - Telescope 11. Candlelight - Poetry - Hammer 12. Dolphin - Shadow - Garage 13. Comet - Spice - Fountain 14. Iceberg - Legend - Notebook 15. Prism - Courage - Kitchen 16. Storm - Keyhole - Balloon 17. Forest - Puzzle - Window 18. Horizon - Lantern - Carpet 19. Moonlight - Detective - Strawberry 20. Whisper - Clockwork - Tattoo 21. Planet - Bridge - Sandal 22. Firefly - Mystery - Pillow 23. Glacier - Silk - Orchestra 24. Sunrise - Locket - Typhoon 25. Snowflake - Paradox - Theatre 26. Maze - Arrow - Cupcake 27. Starship - Ink - Harvest 28. Lighthouse - Script - Dandelion 29. Oceanic - Cipher - Chandelier 30. Galaxy - Canvas - Perfume 31. Desert - Ribbon - Laboratory 32. Tornado - Mirror - Chocolate 33. Cliff - Novel - Magnet 34. Aurora - Pathway - Lemonade 35. Thunderstorm - Charm - Carousel 36. Eclipse - Heirloom - Domino 37. Neon - Fable - Stopwatch 38. Waterfall - Feather - Labyrinth 39. Comet - Dynasty - Fingerprint 40. Mirage - Lock - Sundial 41. Breeze - Chronicle - Chess 42. Twilight - Medal - Beacon 43. Vortex - Script - Bonsai 44. Quasar - Flask - Tapestry 45. Serenade - Ruins - Torch 46. Vineyard - Prism - Novelty 47. Tide - Cavern - Palette 48. Summit - Quill - Lantern 49. Mist - Pioneer - Mandala 50. Canyon - Whisper - Brooch 51. Sapphire - Echo - Diary 52. Zephyr - Gateway - Trinket 53. Oasis - Legend - Mosaic 54. Aurora - Chronicle - Tambourine 55. Hurricane - Map - Locket 56. Blizzard - Scroll - Pebble 57. Meadow - Hourglass - Scale 58. Dusk - Beacon - Orchard 59. Monsoon - Cipher - Plume 60. Cosmos - Goblet - Kite 61. Vortex - Shard - Quiver 62. Dew - Relic - Banquet 63. Zenith - Portal - Viola 64. Solstice - Dagger - Canopy 65. Mirage - Tribute - Wagon 66. Tundra - Pilgrim - Sundae 67. Pinnacle - Silhouette - Chisel 68. Geyser - Chronicle - Cradle 69. Oasis - Mariner - Hive 70. Nebula - Loom - Minaret 71. Dune - Helm - Cauldron 72. Crest - Oracle - Flute 73. Shoal - Enigma - Locket 74. Tether - Odyssey - Cloak 75. Zephyr - Amulet - Gala 76. Delta - Specter - Hearth 77. Apex - Fable - Anvil 78. Void - Laurel - Mirth 79. Ethereal - Sage - Canteen 80. Arch - Glimpse - Tambour 81. Quiver - Solace - Pyre 82. Nexus - Spell - Hatchet 83. Opal - Pilgrimage - Forge 84. Sentinel - Mirage - Lute 85. Zenith - Keepsake - Hatch 86. Pinnacle - Drifter - Kettle 87. Cipher - Maelstrom - Opus 88. Oasis - Alchemy - Quilt 89. Nectar - Escapade - Scepter 90. Gale - Heirloom - Portico 91. Summit - Ballad - Crucible 92. Aurora - Vestige - Spire 93. Nexus - Serenity - Pavilion 94. Beacon - Shard - Grotto 95. Twilight - Mantle - Spindle 96. Summit - Orchard - Loom 97. Ethereal - Fang - Piston 98. Celestial - Scroll - Pilaster 99. Mirage - Enclave - Carafe 100. Zephyr - Canopy - Mosaic Run 3 trisociations: 1. Sun - Ocean - Wind 2. Book - Pencil - Table 3. Apple - Orange - Banana 4. Cat - Dog - Bird 5. Chair - Sofa - Lamp 6. Moon - Star - Planet 7. Ice - Water - Steam 8. Hat - Coat - Scarf 9. Car - Truck - Motorcycle 10. Grass - Tree - Flower 11. Bread - Cheese - Wine 12. Mountain - Valley - River 13. Fish - Crab - Shark 14. Computer - Tablet - Phone 15. Guitar - Piano - Drum 16. Painting - Sculpture - Mosaic 17. Gold - Silver - Copper 18. Elephant - Lion - Giraffe 19. Cloud - Rain - Lightning 20. Sand - Rock - Dirt 21. Milk - Coffee - Tea 22. Sugar - Salt - Pepper 23. Hammer - Saw - Drill 24. Clock - Watch - Calendar 25. Candle - Lamp - Flashlight 26. Sock - Shoe - Boot 27. Train - Plane - Ship 28. Key - Lock - Door 29. Leaf - Stem - Root 30. Soap - Towel - Water 31. Soccer - Basketball - Tennis 32. Bookshelf - Drawer - Closet 33. Knife - Fork - Spoon 34. Jacket - Shirt - Pants 35. Wallet - Purse - Backpack 36. Egg - Bacon - Pancake 37. Glacier - Desert - Jungle 38. Stove - Oven - Microwave 39. Rice - Pasta - Bread 40. Mirror - Window - Glass 41. Honey - Jelly - Syrup 42. Bee - Butterfly - Ant 43. Kite - Balloon - Helicopter 44. Owl - Hawk - Eagle 45. Cheese - Yoghurt - Butter 46. Mango - Papaya - Pineapple 47. Shampoo - Conditioner - Soap 48. Frog - Turtle - Snake 49. Horse - Cow - Sheep 50. Diary - Letter - Postcard 51. Sponge - Brush - Rag 52. Violin - Cello - Bass 53. Cherry - Grape - Plum 54. Statue - Fountain - Bench 55. Pear - Peach - Kiwi 56. Screw - Nail - Bolt 57. Ruler - Compass - Protractor 58. Fox - Wolf - Bear 59. Zebra - Hippo - Rhino 60. Seagull - Pigeon - Sparrow 61. Marble - Granite - Slate 62. Lake - Pond - Stream 63. Whistle - Bell - Horn 64. Onion - Garlic - Ginger 65. Pea - Corn - Carrot 66. Lighthouse - Tower - Castle 67. Lipstick - Eyeshadow - Mascara 68. Bat - Rat - Cat 69. Ladder - Stairs - Elevator 70. Cinema - Theatre - Museum 71. Curry - Sushi - Pizza 72. Tornado - Hurricane - Tsunami 73. Skateboard - Scooter - Bicycle 74. Harp - Flute - Clarinet 75. Oyster - Mussel - Clam 76. Cucumber - Tomato - Lettuce 77. Chess - Checkers - Backgammon 78. Eraser - Marker - Pencil 79. Sunflower - Daisy - Rose 80. Mustard - Ketchup - Mayonnaise 81. Doll - Puzzle - Yo-yo 82. Oak - Maple - Birch 83. Mittens - Gloves - Scarves 84. Duck - Goose - Swan 85. Hammerhead - Great White - Whale shark 86. Almond - Cashew - Peanut 87. Quilt - Blanket - Pillow 88. Falcon - Robin - Hummingbird 89. Acorn - Walnut - Chestnut 90. Lime - Lemon - Orange 91. Crocodile - Alligator - Lizard 92. Pedal - Wheel - Handlebar 93. Lantern - Candle - Torch 94. Cactus - Fern - Ivy 95. Skirt - Dress - Blouse 96. Goldfish - Koi - Guppy 97. Crib - Bunk bed - Hammock 98. Iguana - Chameleon - Gecko 99. Shovel - Hoe - Rake 100. Snail - Slug - Worm
In [9]:
Copied!
# function to calculate and display per run time and token metrics
def calculate_and_display_metrics(results):
# Prepare data for DataFrame
data = [{
'Execution Time': result['execution_time'],
'Prompt Tokens': result['prompt_tokens'],
'Completion Tokens': result['completion_tokens'],
'Total Tokens': result['total_tokens']
} for result in results]
# Create DataFrame
df_metrics = pd.DataFrame(data)
# Set custom index names (Run 1, Run 2, ...)
run_indices = [f'Run {i+1}' for i in range(len(results))]
df_metrics.index = run_indices
# Apply styling for better readability
styled_df = df_metrics.style.format({
'Execution Time': "{:.2f} seconds",
'Prompt Tokens': "{:.0f}",
'Completion Tokens': "{:.0f}",
'Total Tokens': "{:.0f}"
}).background_gradient(cmap='viridis', subset=['Total Tokens'])
return styled_df
metrics_df = calculate_and_display_metrics(results)
metrics_df
# function to calculate and display per run time and token metrics
def calculate_and_display_metrics(results):
# Prepare data for DataFrame
data = [{
'Execution Time': result['execution_time'],
'Prompt Tokens': result['prompt_tokens'],
'Completion Tokens': result['completion_tokens'],
'Total Tokens': result['total_tokens']
} for result in results]
# Create DataFrame
df_metrics = pd.DataFrame(data)
# Set custom index names (Run 1, Run 2, ...)
run_indices = [f'Run {i+1}' for i in range(len(results))]
df_metrics.index = run_indices
# Apply styling for better readability
styled_df = df_metrics.style.format({
'Execution Time': "{:.2f} seconds",
'Prompt Tokens': "{:.0f}",
'Completion Tokens': "{:.0f}",
'Total Tokens': "{:.0f}"
}).background_gradient(cmap='viridis', subset=['Total Tokens'])
return styled_df
metrics_df = calculate_and_display_metrics(results)
metrics_df
Out[9]:
Execution Time | Prompt Tokens | Completion Tokens | Total Tokens | |
---|---|---|---|---|
Run 1 | 37.30 seconds | 53 | 1031 | 1084 |
Run 2 | 41.59 seconds | 53 | 957 | 1010 |
Run 3 | 36.55 seconds | 53 | 905 | 958 |
In [10]:
Copied!
# function to get duplicate stats per run
def per_run_aggregate_summary(results):
detailed_data = []
for run_index, result in enumerate(results, start=1):
trisociations = result['trisociations'].strip()
trisociation_list = trisociations.split('\n')
for trisociation_index, trisociation in enumerate(trisociation_list, start=1):
words = re.sub(r'^\d+\.\s*', '', trisociation).replace(' - ', ' ').split()
for word in words:
detailed_data.append({'run': run_index, 'word': word})
df = pd.DataFrame(detailed_data)
summary_stats = []
unique_runs = df['run'].unique()
for run in sorted(unique_runs):
run_df = df[df['run'] == run]
total_words = run_df.shape[0]
total_unique_words = run_df['word'].nunique()
total_duplicates = run_df[run_df.duplicated('word', keep=False)]['word'].nunique()
if not run_df[run_df.duplicated('word', keep=False)].empty:
mode_duplicate = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().idxmax()
max_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().max()
modal_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().mode()[0]
else:
mode_duplicate = 'None'
max_dup_freq = 0
modal_dup_freq = 0
min_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().min() if not run_df[run_df.duplicated('word', keep=False)].empty else 0
summary_stats.append({
'Total Words': total_words,
'Total Unique Words': total_unique_words,
'Total Duplicates': total_duplicates,
'Max Dup Word': mode_duplicate,
'Max Dup Freq': max_dup_freq,
'Modal Dup Freq': modal_dup_freq,
'Min Dup Freq': min_dup_freq
})
summary_df = pd.DataFrame(summary_stats, index=[f'Run {run}' for run in sorted(unique_runs)])
# Apply styling
styled_df = summary_df.style\
.format({'Total Words': "{:}", 'Total Unique Words': "{:}", 'Total Duplicates': "{:}",
'Max Dup Word': "{}", 'Max Dup Freq': "{:}", 'Modal Dup Freq': "{:}", 'Min Dup Freq': "{:}"})\
.set_table_styles([{
'selector': 'th',
'props': [
('background-color', '#f4f4f4'),
('color', '#6d6d6d'),
('font-weight', 'bold')
]}])\
.set_caption("Summary Statistics per Run")\
.set_properties(**{'text-align': 'left'})
return styled_df
# display styled DataFrame
per_run_aggregate_summary_styled = per_run_aggregate_summary(results)
per_run_aggregate_summary_styled
# function to get duplicate stats per run
def per_run_aggregate_summary(results):
detailed_data = []
for run_index, result in enumerate(results, start=1):
trisociations = result['trisociations'].strip()
trisociation_list = trisociations.split('\n')
for trisociation_index, trisociation in enumerate(trisociation_list, start=1):
words = re.sub(r'^\d+\.\s*', '', trisociation).replace(' - ', ' ').split()
for word in words:
detailed_data.append({'run': run_index, 'word': word})
df = pd.DataFrame(detailed_data)
summary_stats = []
unique_runs = df['run'].unique()
for run in sorted(unique_runs):
run_df = df[df['run'] == run]
total_words = run_df.shape[0]
total_unique_words = run_df['word'].nunique()
total_duplicates = run_df[run_df.duplicated('word', keep=False)]['word'].nunique()
if not run_df[run_df.duplicated('word', keep=False)].empty:
mode_duplicate = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().idxmax()
max_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().max()
modal_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().mode()[0]
else:
mode_duplicate = 'None'
max_dup_freq = 0
modal_dup_freq = 0
min_dup_freq = run_df[run_df.duplicated('word', keep=False)]['word'].value_counts().min() if not run_df[run_df.duplicated('word', keep=False)].empty else 0
summary_stats.append({
'Total Words': total_words,
'Total Unique Words': total_unique_words,
'Total Duplicates': total_duplicates,
'Max Dup Word': mode_duplicate,
'Max Dup Freq': max_dup_freq,
'Modal Dup Freq': modal_dup_freq,
'Min Dup Freq': min_dup_freq
})
summary_df = pd.DataFrame(summary_stats, index=[f'Run {run}' for run in sorted(unique_runs)])
# Apply styling
styled_df = summary_df.style\
.format({'Total Words': "{:}", 'Total Unique Words': "{:}", 'Total Duplicates': "{:}",
'Max Dup Word': "{}", 'Max Dup Freq': "{:}", 'Modal Dup Freq': "{:}", 'Min Dup Freq': "{:}"})\
.set_table_styles([{
'selector': 'th',
'props': [
('background-color', '#f4f4f4'),
('color', '#6d6d6d'),
('font-weight', 'bold')
]}])\
.set_caption("Summary Statistics per Run")\
.set_properties(**{'text-align': 'left'})
return styled_df
# display styled DataFrame
per_run_aggregate_summary_styled = per_run_aggregate_summary(results)
per_run_aggregate_summary_styled
Out[10]:
Total Words | Total Unique Words | Total Duplicates | Max Dup Word | Max Dup Freq | Modal Dup Freq | Min Dup Freq | |
---|---|---|---|---|---|---|---|
Run 1 | 301 | 272 | 25 | Glacier | 3 | 2 | 2 |
Run 2 | 300 | 259 | 30 | Mirage | 4 | 2 | 2 |
Run 3 | 303 | 294 | 9 | Pencil | 2 | 2 | 2 |