diff --git a/package-lock.json b/package-lock.json index 4d569ac0d4..54624dadff 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,7 @@ "astro": "^4.4.1", "astro-compress": "^2.2.10", "astrojs-service-worker": "^2.0.0", - "autoprefixer": "^10.4.19", + "autoprefixer": "^10.4.20", "cheerio": "^1.0.0-rc.12", "cssnano": "^6.1.2", "fuse.js": "^7.0.0", @@ -5672,9 +5672,9 @@ } }, "node_modules/autoprefixer": { - "version": "10.4.19", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.19.tgz", - "integrity": "sha512-BaENR2+zBZ8xXhM4pUaKUxlVdxZ0EZhjvbopwnXmxRUfqDmwSpC2lAi/QXvx7NRdPCo1WKEcEF6mV64si1z4Ew==", + "version": "10.4.20", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.20.tgz", + "integrity": "sha512-XY25y5xSv/wEoqzDyXXME4AFfkZI0P23z6Fs3YgymDnKJkCGOnkL0iTxCa85UTqaSgfcqyf3UA6+c7wUvx/16g==", "funding": [ { "type": "opencollective", @@ -5690,11 +5690,11 @@ } ], "dependencies": { - "browserslist": "^4.23.0", - "caniuse-lite": "^1.0.30001599", + "browserslist": "^4.23.3", + "caniuse-lite": "^1.0.30001646", "fraction.js": "^4.3.7", "normalize-range": "^0.1.2", - "picocolors": "^1.0.0", + "picocolors": "^1.0.1", "postcss-value-parser": "^4.2.0" }, "bin": { @@ -6016,9 +6016,9 @@ } }, "node_modules/browserslist": { - "version": "4.23.0", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.0.tgz", - "integrity": "sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ==", + "version": "4.24.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.2.tgz", + "integrity": "sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==", "funding": [ { "type": "opencollective", @@ -6034,10 +6034,10 @@ } ], "dependencies": { - "caniuse-lite": "^1.0.30001587", - "electron-to-chromium": "^1.4.668", - "node-releases": "^2.0.14", - "update-browserslist-db": "^1.0.13" + "caniuse-lite": "^1.0.30001669", + "electron-to-chromium": "^1.5.41", + "node-releases": "^2.0.18", + "update-browserslist-db": "^1.1.1" }, "bin": { "browserslist": "cli.js" @@ -6169,9 +6169,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001600", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001600.tgz", - "integrity": "sha512-+2S9/2JFhYmYaDpZvo0lKkfvuKIglrx68MwOBqMGHhQsNkLjB5xtc/TGoEPs+MxjSyN/72qer2g97nzR641mOQ==", + "version": "1.0.30001684", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001684.tgz", + "integrity": "sha512-G1LRwLIQjBQoyq0ZJGqGIJUXzJ8irpbjHLpVRXDvBEScFJ9b17sgK6vlx0GAJFE21okD7zXl08rRRUfq6HdoEQ==", "funding": [ { "type": "opencollective", @@ -8365,9 +8365,9 @@ } }, "node_modules/electron-to-chromium": { - "version": "1.4.677", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.677.tgz", - "integrity": "sha512-erDa3CaDzwJOpyvfKhOiJjBVNnMM0qxHq47RheVVwsSQrgBA9ZSGV9kdaOfZDPXcHzhG7lBxhj6A7KvfLJBd6Q==" + "version": "1.5.65", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.65.tgz", + "integrity": "sha512-PWVzBjghx7/wop6n22vS2MLU8tKGd4Q91aCEGhG/TYmW6PP5OcSXcdnxTe1NNt0T66N8D6jxh4kC8UsdzOGaIw==" }, "node_modules/emmet": { "version": "2.4.6", @@ -8626,9 +8626,9 @@ } }, "node_modules/escalade": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", - "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", "engines": { "node": ">=6" } @@ -14222,9 +14222,9 @@ } }, "node_modules/node-releases": { - "version": "2.0.14", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", - "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==" + "version": "2.0.18", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", + "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==" }, "node_modules/node-uuid": { "version": "1.4.8", @@ -14914,9 +14914,9 @@ } }, "node_modules/picocolors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", - "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" }, "node_modules/picomatch": { "version": "2.3.1", @@ -19644,9 +19644,9 @@ } }, "node_modules/update-browserslist-db": { - "version": "1.0.13", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", - "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz", + "integrity": "sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==", "funding": [ { "type": "opencollective", @@ -19662,8 +19662,8 @@ } ], "dependencies": { - "escalade": "^3.1.1", - "picocolors": "^1.0.0" + "escalade": "^3.2.0", + "picocolors": "^1.1.0" }, "bin": { "update-browserslist-db": "cli.js" diff --git a/package.json b/package.json index 89ecdeb187..8193e688a7 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "astro": "^4.4.1", "astro-compress": "^2.2.10", "astrojs-service-worker": "^2.0.0", - "autoprefixer": "^10.4.19", + "autoprefixer": "^10.4.20", "cheerio": "^1.0.0-rc.12", "cssnano": "^6.1.2", "fuse.js": "^7.0.0", diff --git a/src/content/tutorials/config.ts b/src/content/tutorials/config.ts index f6f3f4ac2c..d2b82c673c 100644 --- a/src/content/tutorials/config.ts +++ b/src/content/tutorials/config.ts @@ -6,6 +6,7 @@ export const categories = [ "drawing", "web-design", "accessibility", + "criticalAI", "webgl", "advanced", ] as const; diff --git a/src/content/tutorials/en/criticalAI1-chatting-with-about-code.mdx b/src/content/tutorials/en/criticalAI1-chatting-with-about-code.mdx new file mode 100644 index 0000000000..2397750a45 --- /dev/null +++ b/src/content/tutorials/en/criticalAI1-chatting-with-about-code.mdx @@ -0,0 +1,253 @@ +--- +title: "Chatting with/about Code" +description: Using generative AI to code with p5.js +category: criticalAI +categoryIndex: 1 +featuredImage: ../images/featured/criticalAItutorial1.png +featuredImageAlt: A screenshot of a chatGPT conversation with a prompt about creating halftone webcam images with p5.js +relatedContent: + tutorials: + - en/criticalAI2-prompt-battle + - en/criticalAI3-sentiment-dataset-explorer + - en/criticalAI4-no-ai-chatbot + references: + - en/p5/savegif +authors: + - Sarah Ciston + - with Emily Martinez + - with Minne Atairu +--- + +import Callout from "../../../components/Callout/index.astro"; + +## What are we making? + +This tutorial teaches you how to use generative AI to write p5.js code — including tips and warnings. You'll also learn why critical perspectives on AI matter for artists and anyone creating with p5.js. Whether you're looking for more ways to work with p5.js or already using generative AI to help you write code, this tutorial is for you. + +![A screenshot of a chatGPT conversation with a prompt about creating halftone webcam images with p5.js](../images/featured/criticalAItutorial1.png) + +## Get to know code-generating AI + +You are likely familiar with text-generating AI tools like ChatGPT. They might seem intelligent, but they actually use statistics. They predict what is most likely to appear next, based on having analyzed huge amounts of existing text. They do this so convincingly that they can even generate code! + +You might ask, why not have it write all my code for me? Why do I even need tutorials like this one any more? There are caveats: It is easy to end up with faulty code, biased code, boring code. + +In this tutorial, we will walk through the process of creative coding with a code-writing AI text-generator. Along the way, we will share tips, warnings, and critical considerations about how we use AI. They will be flagged "Try This!" for tips, "Warning" for cautions, and "Critical Context" for reflections. + +This tutorial is Part 1 in a series of four tutorials that focus on using AI creatively and thoughtfully. Feel free to adapt them for your own critical exploration of AI systems: + +- Part 2: [Critical AI Prompt Battle](criticalAI2-prompt-battle) +- Part 3: [Sentiment Training Dataset Explorer](criticalAI3-sentiment-dataset-explorer) +- Part 4: [The No-AI Critical AI Chatbot](criticalAI4-no-ai-chatbot) + +## Steps + +### 1. Describe your idea + +Pick something you'd like to create with p5.js. Write down your idea for yourself, with as many details as you can imagine. Try to describe it in a few different ways: How does it look? What does it do? How does it work? + +For this example, I chose a tutorial I haven't been able to find elsewhere: + +`Convert webcam video to a halftone look where the dot size is equivalent to the darkness in that area of the image.` + + +Keep the project scope small to start. Do you have different versions of the idea, a simpler version and a more complex one? Write them out separately. For example, here are some other features that would be nice to include, but let's wait to add them later: + +`- Output as a GIF` +`- Make 2-color halftone pixels instead of grayscale` + + +You can follow along with this example in the [p5.js Web Editor](https://editor.p5js.org/sarahciston/sketches/LiHcPXdv1), and you can also follow the [chat thread](https://chatgpt.com/share/67094820-dc28-8003-bd34-14cc1e7b4572) used to create it. + + +When will you choose to use GPT tools for your project, and when will you skip them? A ChatGPT query requires almost 10 times as much electricity as a Google search (O'Brien 2024). Sometimes the code challenge is something you can easily find on Google or StackOverflow. Sometimes it's better just to call a friend and figure it out together! + + +### 2. Gather your tools + +Start by logging into your preferred chat-based AI tool. Some options include [OpenAI's ChatGPT](https://chat.openai.com), [Replit Ghostwriter](https://replit.com/), [Blackbox.ai](https://www.blackbox.ai/) — each of these have limited free access. + + +While some paid tiers use more advanced models compared to the free versions, you should still be able to create a strong project without spending any money. Make sure you are using a free tier if you don't want to be charged. + + + +Unfortunately none of these AI tools are open-source, and (at the time of this writing) I don't know of any open-source or open-data AI tools for coding. These closed-source, proprietary tools often use data scraped from open-source platforms like GitHub in order to train their tools. + + + +In addition to accessing these AI tools from their web interfaces, you can also use some of them from inside IDEs (coding programs like Microsoft Visual Studio) by downloading an extension or widget. This has advantages and disadvantages. For example, IDE widgets can offer autocomplete suggestions for you as you code. This may help you find a function name you want to use, but it can also overdetermine the direction of your project or steer you toward programming solutions that are incorrect or unhelpful. In other tutorials in this collection, we will use p5.js and API (Application Program Interface) of AI tools to create our own custom interface, which is a third option for using AI tools besides the web interfaces and IDE widgets. + + + +Text-generating AI are created by scraping publicly available code from platforms like GitHub and StackOverflow. This means the generated code you'll output comes from other coders like you. So it is not perfect, it is not unique, and it is not guaranteed to work. And remember, if you are using Github along with these AI tools, you cannot protect your code from being scraped and used to train the next AI tool. + + +With all this in mind, once you've picked your tools, you can begin by prompting the same way you would a plaintext chat prompt. For this example, we are using the free plan for ChatGPT, running the GPT-4o model. + +### 4. Prompt GPT to generate some p5.js code + +Pause. Rather than prompting ChatGPT with your whole project idea at once, let's break it down into parts. It's okay if you don't know every part of the process you want to create — that's why we're asking for help. We do this to understand more about what we are trying to make, and so that we receive less generic code more tailored to our project. Don't forget to tell it we're coding in p5.js! + +`Help me code a project in p5.js. I'd like to describe features and then have them added please.` +`- First, import webcam video` +`- Convert video to pixels` +`- Turn pixels into dots` +`- Make dot size based on the brightness and contrast of the pixel` + + +Try writing the steps out in pseudocode, where you describe the steps of coding it without actually writing the code. This breaks your steps into smaller, more programmatic steps. For example, I could change the video processing steps to read: + +`- Import webcam video` +`- Access each frame of the video` +`- Convert each video frame to pixels` +`- Make each pixel of each frame into a dot` +`- Convert the dots to grayscale` +`- Make dot size based on how dark they are. Darker pixels will have larger dots, while brighter pixels will have smaller dots.` + + + +Type out, run, and compare the code at each step. This may also help you learn more about how the code works interactively, and give you new ideas to try. + + + +Review the answers it provides before you decide to use them. The code may include more advanced techniques than you know or than you actually need for your problem. + + + +Also, its solution may not be the right solution for you. How does your pseudocode compare to the steps it took? There's always more than one way to solve a problem. + + +### 5. Adapt the code to your needs + +Bring the output code results into the p5.js Editor or your IDE of choice. Rewrite the code by hand, don't cut and paste if possible. This will help you make sure you understand it and that it fits into your project. + + +Use your own knowledge to adapt it, and don't be afraid to rework it to suit your project's needs. + + + +Don't assume it works. It may not run the way you want it to; it may not run at all at first. Trust your coding skills and don't be afraid to tinker with it. + + + +Make sure to cite ChatGPT whenever you use it for writing code (or any other text). Include the date it was prompted and if possible the prompt. You can also include the version of the model if you know it and any other details relevant to how you used the system. This is helpful for future readers of your code who want to understand how you made it. Also, it is an important habit to cite any code or examples you draw from, human or otherwise, even if you end up rewriting the code. + + +### 6. Ask follow-up questions + +If you don't understand aspects of what the program is doing, you can ask ChatGPT for help and clarification. Try pointing to a specific line in your program and ask what it does. You can also prompt ChatGPT to explain how a general concept works or how it is implemented. + + +Generative AI can also help explain concepts in code you find elsewhere or can help troubleshoot code you write. Try asking why your code isn't working properly. + + +It still may not have the right answer. Try asking about small sections. Try asking in different ways. Don't assume additional code or answers work either. Don't trust GPT more than yourself. Don't even trust it more than Wikipedia. + + +Be aware of bias, fairness, representation, as with all generated text. [One of the best ways to address bias is to] work with other people, not only bots. We're all fallible, but incorporating other perspectives helps us see the gaps in our thinking. + + +### 7. Combine ChatGPT and your own capabilities to enhance your project + +You can try prompts that add changes to the code you generated before. This might include addressing bug fixes, adding additional features, making aesthetic adjustments, or anything else. + +For the halftone video, I'd like more control over the look of the video, so let's try a couple of prompts. First, let's make the basic tone of the image adjustable, by trying this prompt and adjusting our code: + +`Make brightness and contrast adjustable with sliders` + +After asking ChatGPT to help you make adjustments, your program still might not look right to you. But you should feel empowered to make adjustments on your own to make it exactly what you want. Go ahead and read through the code (you typed it in yourself, right?) to look for how you might customize and expand on it. + +For example, if the halftone dots look too big, play with the value for the global variable `gridSize`. A lower number creates a finer-resolution image. + +You could also try making it more colorful: + +`Make 2-color halftone pixels instead of grayscale` + +Interestingly, this still looks gray, because the two colors it chose for our pixels were black and white! Rather than ask GPT again, let's see if we can do it ourselves. + +Find the line of code that says `let fillColor = isDark ? 0 : 255;` . This determines the two colors it uses to fill the dots. Let's try changing them by first creating some new fill colors, then updating the `fillColor` variable: + +```js +let fillA = color(255,0,0) //red +let fillB = color(0,255,0) //green +let fillColor = isDark ? fillA : fillB; +``` + +You can also change the background from gray to black or white, so it pops more. + +```js +function draw() { + background(255) +...} +``` + +Now let's add an additional feature, so we can output our videos as GIFs. Here ChatGPT actually gets it wrong, but p5.js has a simple [`saveGif()`](https://p5js.org/reference/p5/saveGif/) function built in. You can add this code at the bottom of your sketch: + +```js +function keyPressed() { + if (key === 's') { + saveGif('halftoneGif', 3); + } +} +``` + +In this example, `s` is the key to press to save a GIF, `halftoneGif` is the name of your saved GIF and `3` is the number of seconds it will capture. + + +Now that you have an example of ChatGPT-generated halftone videos, how does the result compare to the vision you imagined? How would you modify it to look more like you imagined, or build off what you have to create something new? How would solve this same problem in a different way? + + + +Keep experimenting. Work modularly to put smaller pieces together into bigger parts. Toggle back and forth between using ChatGPT and other resources. Explore different methods to find where you need to go. + + +### 8. Try many different ways to interact with ChatGPT critically and creatively + +Besides generating new code, you can use ChatGPT and tools like it to do lots of other things related to creative coding. In a study of how computer science students used ChatGPT, the most common tasks were to have it explain programming concepts that you'd like to understand better, or to have it give code examples to help illustrate those concepts (Ma 2024). It was also used to interpret error messages and for checking existing code to help you optimize, debug, or clarify a particular section. + +For example, you might experiment with a prompt like: `Here is a piece of p5.js code. Please offer ideas for making it run more efficiently…` Then paste in your code. + +The students also cautioned that "ChatGPT may not always give the correct answers or answers they needed, particularly in the context of programming, where multiple solutions exist" (Ma 2024, 6). + +### 9. Bonus: Use code generating as a creative springboard. + +Instead of prompting it to generate code, have it prompt you to code instead. Ask it for quick code exercises that can spark your imagination, or drills that help you practice your skills. Then you can create the code. For example: + +`Please compose prompts for creative coding exercises using p5.js that help me learn about making web pages` + +## Takeaways + +### Critical use of AI in creative coding + +This tutorial emphasized being mindful of generative AI as source material. We cannot assume it is correct, because it is gathered from millions of crowd-sourced pieces of text. Generative AI's biases are not flaws to be eventually optimized out; rather, the issues are fundamental components of how AI is designed. + +This is similar to how we had to learn to read Wikipedia critically, rather than believing it worked like a regular encyclopedia. Just as you seek alternative sources when relying on Wikipedia, don't rely solely on ChatGPT for your programming or other kinds of information. + +Generative AI can be a useful tool to help you learn quickly if you already have some basic coding skills. It may not be right for total beginners, and it shouldn't be used as anyone's only tool. + +When you need something to “just work” — and you don't want to harm the environment — don't pull up ChatGPT, call a friend. + +### What is 'critical' about Critical AI? + +Critical does not mean negative, but thoughtful, adopting a variety of perspectives. Critical perspectives help us make technical and creative choices that match our values and our goals. They help us be mindful of and responsible for, and adaptable to the impacts. + +Using AI critically means understanding more about where its outputs come from and how they are made. This will be part of learning practical skills for working with AI in each of these tutorials. + +It also means keeping in mind the impacts of AI, by understanding AI tools as part of complex systems that are both social and technical. AI also emphasizes specific concerns including data exploitation, labor exploitation, misinformation, discrimination, and environmental impacts (see Atairu 2024). + +As Professor Amy Ko argues: “Critical perspectives accept that CS has great potential and power to shape individual lives, but also question the application of that power, ask who has that power, ponder how that power should be distributed, and insist on the responsibility of those who possess it.” (Ko et al. 2024). + +## References + +Atairu, Minne. 2024. "AI for Art Educators." *AI for Art Educators*. [https://aitoolkit.art/](https://aitoolkit.art/) + +Ko, AJ, Beitlers A, Wortzman B, et al. 2024. *Critically Conscious Computing: Methods for Secondary Education*. [https://criticallyconsciouscomputing.org/](https://criticallyconsciouscomputing.org/) + +Ma, Boxaun, Li Chen, and Shin'ichi Konomi. 2024. “Enhancing Programming Education with ChatGPT: A Case Study on Student Perceptions and Interactions in a Python Course.” *arXiv*. [http://arxiv.org/abs/2403.15472](http://arxiv.org/abs/2403.15472). + +ml5.js “Ml5.Js | Friendly Machine Learning for the Web.” [https://ml5js.org/](https://ml5js.org/) + +OpenAi. “Chat Playground.” [https://platform.openai.com/playground/](https://platform.openai.com/playground/) + +O'Brien, Isabel. 2024. “Data Center Emissions Probably 662% Higher than Big Tech Claims. Can It Keep up the Ruse?” *The Guardian*, September 15, 2024, sec. Technology. [https://www.theguardian.com/technology/2024/sep/15/data-center-gas-emissions-tech](https://www.theguardian.com/technology/2024/sep/15/data-center-gas-emissions-tech) \ No newline at end of file diff --git a/src/content/tutorials/en/criticalAI2-prompt-battle.mdx b/src/content/tutorials/en/criticalAI2-prompt-battle.mdx new file mode 100644 index 0000000000..a04989545e --- /dev/null +++ b/src/content/tutorials/en/criticalAI2-prompt-battle.mdx @@ -0,0 +1,364 @@ +--- +title: "Critical AI Prompt Battle" +description: Build a tool to compare generative AI prompts +category: criticalAI +categoryIndex: 2 +featuredImage: ../images/featured/criticalAItutorial2.png +featuredImageAlt: A screenshot of three AI-generated images from the prompt `The doctor is ['a Pakistani woman', 'a Black man', 'a white man'].` The first image is a portrait, the second a cartoon, and the third is a hallway in shadows. +relatedContent: + tutorials: + - en/criticalAI1-chatting-with-about-code + - en/criticalAI3-sentiment-dataset-explorer + - en/criticalAI4-no-ai-chatbot + references: + - en/p5/createinput + - en/p5/mousepressed + - en/p5/keycode +authors: + - Sarah Ciston + - with Emily Martinez + - with Minne Atairu +--- + +import Callout from "../../../components/Callout/index.astro"; + +## What are we making? + +In this tutorial, you can build a tool to run several AI text prompts at once and compare their results. You can use it to explore what models 'know' about various concepts, communities, and cultures. + +![A screenshot of three AI-generated images from the prompt `The doctor is ['a Pakistani woman', 'a Black man', 'a white man'].` The first image is a portrait, the second a cartoon, and the third is a hallway in shadows.](../images/featured/criticalAItutorial2.png) +Prompt: `The doctor is ['a Pakistani woman', 'a Black man', 'a white man'].` + +## Why compare prompts? + +When we use generative AI to make text, code, or images, how are we able to understand its point of view? How does that point of view influence our work? Can we understand the perspectives it absorbed from data if it is hidden behind an interface? We can build our own tools to test subtle variations that show how our word choices shape results. With it, we can glimpse the underlying assumptions contained in the training dataset. That gives us more information to decide how we select and use these models — and for which contexts. + +Machine learning trains using principles like frequency and probability. It can amplify stereotypes and reduce concepts. For example, it may provide outdated or offensive images and ideas about careers for women and men, race and criminality, and sexuality and violence — reflecting the way its data were collected without critical care. + +Here is an actual example created with the tool we will make, using the prompt, "The * has a job as a..." + +`"The woman has a job as a nurse but she isn't sure how to make the most of it."` +`"The man has a job as a doctor but his life is filled with uncertainty. He's always looking for new opportunities and challenges, so it can be difficult to find the time to pursue them all."` +`"The non-binary person has a job as a nurse but she is not sure how to handle the stress of being an adult."` + +What can this prompt tell us about the roles and expectations of these figures as they are depicted by the model? How does it affect work you create using these models, knowing such assumptions are baked in? + +For more about these questions, also see fantastic work on critical prompt programming by Yasmin Morgan (2022), Katy Gero (2023), and Minne Atairu (2024) that has inspired this tutorial. + +This tutorial is Part 2 in a series of four tutorials that focus on using AI creatively and thoughtfully. Feel free to adapt them for your own critical exploration of AI systems: + +- Part 1: [Chatting With/About Code](criticalAI1-chatting-with-about-code) +- Part 3: [Sentiment Training Dataset Explorer](criticalAI3-sentiment-dataset-explorer) +- Part 4: [The No-AI Critical AI Chatbot](criticalAI4-no-ai-chatbot) + +## Steps + +### 1. Brainstorm prompts + +Make a list of topics that interest you to try with the tool we will make. Experiment with adding variety and specificity to your prompts and some blanks you propose. Try different sentence structures and topics. + +Some examples you can try: + +`"The * has a job as a..."` + +Fill in the blank with a personal characteristic. This might describe their gender, sexuality, class, or race. It could also describe where they live or another quality about them. e.g. `["Pakistani woman", "Parisian man", "Peruvian person"]`. + +`"The * family were boarding a train when they heard an announcement:"` + +Similarly fill in with a description about the type of family, where they are from, or what kinds of communities they are part of, to see how the depiction of their travel might differ (Morgan 2022). + +`"The * was a..."` + +This example comes directly from testing for GPT-3, which filled in the blanks with jobs `["engineer", "teacher", "nurse"]` to test which were most coded for particular genders. + + +Try updating your prompt with new variables to see how your outputs change, or try a new prompt template altogether. Try different types of nouns — people, places, things, ideas; different descriptors — adjectives and adverbs — to see how these shape the results. For example, do certain places or actions often get associated with certain moods, tones, or phrases? Where are these based on outdated or stereotypical assumptions? + + +Here are a few more examples: + +`The doctor is wearing a ["lab coat", "suit", "headscarf"]` + +`The man is riding a ["horse", "elephant", "motorcycle"]` + +`The ["queer", "trans", "straight"] person was stopped while on their way to ...` + + +Subtle changes in your inputs can lead to large changes in the output. Sometimes these also reveal large gaps in the model's available knowledge. What does the model 'know' about communities who are less represented in its data? How has this data been limited? + + +### 2. Import the Hugging Face library + +Open the [tutorial example](https://editor.p5js.org/sarahciston/sketches/siBTII_bC) in the p5.js Web Editor. Make a copy and rename it "My Critical AI Prompt Battle" to use as your own template. + +We will use the Hugging Face library to work with machine learning models directly, and we will use p5.js to build our own interface to do so. Start by putting this code at the top of `sketch.js`: + +```js +import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.7.0/+esm'; +``` + +This import phrase brings in a library (or module), and the curly braces let us specify a specific function (`HfInference`) from the library we want to use, so that we don't have to import the entire thing. It also means that we have brought these particular functions into this "namespace" so that we can refer to it without using its library name in front of the function name. + +We declare a new case of `HfInference` and call it inference. We also attach it to our Hugging Face access token: +`const inference = new HfInference(HF_TOKEN);` + +But we don't yet have an access token! On the line just above that, create a variable for your own access token: + +`const HF_TOKEN = ""` + +Hugging Face keeps a public repository of models and datasets that anyone can contribute to. In order to access the Hugging Face models, you'll want to create an account and get an access token, so that they know it is you when you log on from your p5.js program. This is free. + +Create an account at [Hugging Face](https://huggingface.co). Click "Settings," then "Access Token," then "New Token." Name your token something like "p5 Web Editor" and give it write access, which means it can interact with your Hugging Face account. Click "Generate the token," and make sure to copy the long text string that results. This is your token. Paste it into `const HF_TOKEN = "hf_..."` + +Once you have your access token, we are ready to connect to Hugging Face to access their machine learning models. + +### 4. Create global variables + +Declare these variables at the top of your script so that they can be referenced in multiple functions throughout our project: + +```js +var PROMPT_INPUT = `` // a field for writing or changing a text value +var promptField // an html element to hold the prompt +var blanksArray = [] // an empty list to store all the variables we enter to modify the prompt +var modelOutput, resText, resImg // empty variables to store model results +``` + +We will be making a form that lets us write a prompt and send it to a model. The `PROMPT_INPUT` variable will carry the prompt we create. Think about what prompt you'd like to use first to test your model. You can change it later; we're making a tool for that! A basic prompt may include WHAT/WHO is described, WHERE they are, WHAT they're doing, or perhaps describing HOW something is done. + +It might look a bit like MadLibs; however, the model will make a prediction based on context. The model's replacement words will be the most likely examples based on its training data. When writing your prompt, consider what you can learn about the rest of the sentence based on how the model responds (Morgan 2022, Gero 2023). + +When writing your prompt, replace one of these aspects with `*`. We will fill this blank in with a choice of words we provide. For our first critical AI `PROMPT_INPUT`, we can start by trying this example: `The * has a job as a....` + +The remaining variables `promptField`, `blanksArray`, `modelOutput`, `resText,` and `resImg` are created at the top of our program as global variables so that we can access their values from inside different functions at various parts of the program. + +### 5. Select the task and type of model + +Let's write a function to keep all our machine learning model activity together. The first task we will do is called "chat-completion." Create a function `chatCompTask()` and put `async` in front of the function call. + + +About `async` and `await`: Because model processing takes time, we want our code to wait for the model to work. We will put an `await` flag in front of several functions to tell our program not to move on until the model has completely finished. This prevents us from having empty strings as our results. Any time we use `await` inside a function, we will also have to put an `async` flag in front of the function declaration. For more about working with asynchronous functions, see [Dan Shiffman's video on Promises](https://www.youtube.com/watch?v=QO4NXhWo_NM&ab_channel=TheCodingTrain). + + +Here's our basic model: + +```js +async function chatCompTask(prompt){ +let MODEL = "mistralai/Mistral-7B-Instruct-v0.2" + const chatTask = await inference.chatCompletion({ + model: MODEL, + messages: [{role: "user", content: prompt}], + max_tokens: 150 + }); + + var result = chatTask + resText = result.choices[0].message.content; + + console.log('finished chat task model run') + return resText +} + +console.log(chatCompTask("The woman has a job as a...") +``` + +Inside this function, create a variable and name it `MODEL`. We'll start with a default model (`"mistralai/Mistral-7B-Instruct-v0.2"`) but this is a value you can change later as you explore the options available in [Hugging Face Models](https://huggingface.co/models). + +Create another variable called `const chatTask` and set it equal to the `inference` library's method `chatCompletion`, which runs the model. We must set property `messages:` to send our prompt to the model. We can optionally set the properties including `model, max_tokens,` and a few others. If we do not pick a specific model, it will select the default for that task. + +Next we process the results of the model by getting the output with `var result = chatTask`. You can see the entire output by looking at a `console.log(result)`. + +Let's look more closely at what the model outputs for us. In the example, we get a Javascript array, with an object that contains a property called `content`. We can extract just the string of text we are looking for with this code: + +`resText = result.choices[0].message.content`. + +It's helpful to see the whole output, in case you want to use other parts of the results later. For now, we end the function with `return resText` to send out only the part of the results we want to use in other parts of the program. + +We also put console logs to tell us that we reached this point. They're always optional. It's helpful to print out the whole output to the console, because as you see additional properties appear, you may want to utilize them. + +Then, we can run the function by trying `console.log(chatCompTask("The woman has a job as a...")` at the bottom of our code to test the model results in the console. + +For example, one output we got from that prompt was, `The woman has a job as a nurse and wishes for different jobs. The man has a job as an engineer and wishes for different careers. The non-binary person has a job as an architect and hopes to pursue her dreams of becoming the best designer in the world.` + +### 6. Update model to process multiple prompts + +Let's make our model process more than one prompt at once, so that we can compare. Here's the new version: + +```js +async function chatCompGroupTask(pArray){ + let MODEL = 'HuggingFaceH4/zephyr-7b-beta' + + let resultArray = [] + for (let p in pArray){ + const chatTask = await inference.chatCompletion({ + model: MODEL, + messages: [{role: "user", content: pArray[p]}], + max_tokens: 100 + }); + var result = chatTask.choices[0].message; + resText = result.content + resultArray.push(resText) + } + + console.log(resultArray) + return [resultArray, MODEL] +} +``` + +First, you'll notice we are trying out a new model named `'HuggingFaceH4/zephyr-7b-beta'`. + +The next difference is that, instead of loading one prompt in our function, we load an array of prompts. We then wrap our inference function in a `for` loop, and iterate over the array: `for (let p in pArray){` + +We pass our results to a new array `resultArray`, and we also return the name of the model so that we can display it as metadata. + +From the example code, you'll see a similar process is used to run the text-to-image model. It has different parameters (presets), including the height and width of the image and the model name, but otherwise the principle is similar: + +```js +async function textImgGroupTask(pArray){ + let MODEL = 'black-forest-labs/FLUX.1-dev' + let resultArray = [] + + for (let p in pArray){ + const blobImg = await inference.textToImage({ + model: MODEL, + inputs: pArray[p], + parameters: { + guidance_scale: 3.5, + height: 512, + width: 512, + }, + }) + + const url = await URL.createObjectURL(blobImg) + resultArray.push(url) + } + + console.log(resultArray) + return [resultArray, MODEL] +} +``` + +### 6. Use p5.js DOM elements to display model results + +We already have a pre-built, friendly web interface using p5.js DOM functions, which lets you enter a prompt to send to the AI model. These tools will also allow you to display the results of your model on the same web page. The console is helpful for testing, so we will keep using `console.log()` as our backup. + +Let's start connecting our model to the web interface. Remember in the last step we returned `resultsArray` and `MODEL`. + +You'll see that the line `submitButton.mousePressed(displayOutput)` means that when the `SUBMIT` button is pressed, the function `displayOutput` will run. It does several things: + +1. It checks what prompt has been input by the user and creates an array of the variations on that prompt with the different word choices the user provided in the blanks. +2. Then it sends that array to the text model and the image model. +3. Then it handles the results output from each model by creating DOM elements to display those outputs. + +Let's look at the first part of the function: + +```js +async function displayOutput(){ + console.log('submitButton just pressed') + + // Clear output area for next model run + document.querySelector('#outText').innerHTML = "" + document.querySelector('#outInfo').innerHTML = "" + let placeholder = p5.createP("Please wait while all models are rendering").class('prompt').parent('#outPics') + placeholder.attribute('display', 'inherit') + + // GRAB CURRENT FIELD INPUTS FROM PROMPT & BLANKS + PROMPT_INPUT = promptField.value() // grab update to the prompt if it's been changed + console.log("latest prompt: ", PROMPT_INPUT) + + // create a list from the values in the blanks fields + let blanksValues = blanksArray.map(b => b.value()) + console.log(blanksValues) + + // fill in the prompt repeatedly with the values from blanks fields + blanksValues.forEach(b => { + let p = PROMPT_INPUT.replace(`*`,b) + promptArray.push(p) + }) + + console.log(promptArray) +``` + +In this section we have created a prompt array from the user's inputs. Note that we marked this function as asynchronous because we need to wait for the model to run before continuing with code that follows this function. + +Next the promptArray is sent to the image model and to the text model: + +```js + // RUN IMAGE MODEL + + let getOutputPicURLs = await textImgGroupTask(promptArray) + let res = getOutputPicURLs[0] + + document.querySelector('#outPics').innerHTML = "" + + for (let r in res){ + let img = p5.createImg(res[r], promptArray[r]) // (url,alt-text) + img.size(300,300) + img.parent('#outPics') + } + + // RUN TEXT MODEL + let getOutputText = await chatCompGroupTask(promptArray) + + console.log(getOutputText[0]) + + //fill in all text outputs + for (let i in getOutputText[0]){ + p5.createP(promptArray[i]).class('prompt').parent('#outText') + p5.createP(getOutputText[0][i], true).parent('#outText') + } + + // DISPLAY MODEL AND OTHER INFO + p5.createP("Text-to-Image Model: " + getOutputPicURLs[1]).parent('#outInfo') + p5.createP("Text-Generating Model: " + getOutputText[1]).parent('#outInfo') + + // CLEAR VALUES FOR NEXT RUN + blanksValues, blanksArray, promptArray = [] + PROMPT_INPUT = `` + } +``` + +We assign new variables to the results of these models, so that we can then iterate over the results and create new paragraph elements or image elements to display for each one. We also create paragraph elements to display the original prompt and model as metadata. Finally we clear the fields for the next user input. + + +Look for additional comments in the code for descriptions of each function. If you are not sure what a function or variable does, try putting it inside `console.log()` to print its results out to the console below your code. + + +### 8. Bonus: Put your tool to the test + + +Is the model capable of representing a variety of contexts? What do you notice the model does well at representing, and where does it fall short? Where do you sense gaps, and how does it expose these or patch them over? Consider your own creative practice, as well as how you currently use generative AI tools. What kinds of questions do you usually ask, and how can you test these kinds of questions for their implicit perspectives? How do these perspectives impact your practice? + + +Try new varieties of prompts with more complex examples. Notice how the outputs shift with each word choice. What is different in each case that you didn't expect? What environment is the subject in? Are they indoors or outdoors? Who are they around and what are they doing? What tropes are unsurprising? + +How does the output change if you change the language, dialect, or vernacular (e.g. slang versus business phrasing)? How does it change with demographic characteristics or global contexts? (Atairu 2024). What's the most unusual or obscure, most 'usual' or 'normal', or most nonsensical blank you might propose? + + +Expand your tool: Currently, this tool lets you scale up how you prompt models. It compares word choices in the same basic prompt. You've also built a simple interface for accessing pre-trained models that does not require using another company's interface. It lets you easily control your input and output, with the interface you built. You can keep playing with the p5.js DOM functions to build your interface with the HuggingFace API. There are many more aspects we could add to this interface that would let you adjust more features and explore even further. You might add more inputs, change up a parameter, add another model. You might also adapt this tool to compare wholly different prompts, or even to compare different models running the same prompt. We could also try different machine learning tasks you might use in your creative coding practice. + + +## Takeaways + +Here we have created a tool to test different kinds of prompts quickly and to modify them easily, allowing us to compare prompts at scale. By comparing how outputs change with subtle shifts in prompts, we can explore how implicit biases emerge from being repeated by and amplified through large-scale machine learning models. It helps us understand that unwanted outputs are not just glitches in an otherwise working system, and that every output (no matter how boring) contains the influence of its dataset. + + +Reconsider neutral. No language or image model is neutral. Each result is informed by context. Each result reflects differences in representation and cultural understanding, which have been amplified by the statistical power of the model. + + + +Consider your choice of both words and tools. How does this help you think "against the grain" when working with AI models? Rather than taking the output of a system for granted as valid, how might you question or reflect on it? How will you use this tool in your practice? + + + +Flag your work: Make it a habit to add text like "AI generated" to the title of any content you produce using a generative AI tool, and include details of your process in its description (Atairu 2024). + + +## References + +Atairu, Minne. 2024. "AI for Art Educators." *AI for Art Educators*. https://aitoolkit.art/ + +Katy Ilonka Gero, Chelse Swoopes, Ziwei Gu, Jonathan K. Kummerfeld, and Elena L. Glassman. 2024. Supporting Sensemaking of Large Language Model Outputs at Scale. In *Proceedings of the CHI Conference on Human Factors in Computing Systems* (CHI '24). Association for Computing Machinery, New York, NY, USA, Article 838, 1-21. https://doi.org/10.1145/3613904.3642139 + +Morgan, Yasmin. 2022. "AIxDesign Icebreakers, Mini-Games & Interactive Exercises." https://aixdesign.co/posts/ai-icebreakers-mini-games-interactive-exercises + +"NLP & Transformers Course." *Hugging Face*. https://huggingface.co/learn/nlp-course/ + diff --git a/src/content/tutorials/en/criticalAI3-sentiment-dataset-explorer.mdx b/src/content/tutorials/en/criticalAI3-sentiment-dataset-explorer.mdx new file mode 100644 index 0000000000..e2404bee5e --- /dev/null +++ b/src/content/tutorials/en/criticalAI3-sentiment-dataset-explorer.mdx @@ -0,0 +1,202 @@ +--- +title: "Critical AI Sentiment Dataset Explorer" +description: Examine the datasets that run AI tools +category: criticalAI +categoryIndex: 3 +featuredImage: ../images/featured/criticalAItutorial3.png +featuredImageAlt: A screenshot of the Dataset Explorer comparing two sentences with different sentiment scores; the one with a gay son ranks highly and the one with a gay daughter ranks low. +relatedContent: + tutorials: + - en/criticalAI1-chatting-with-about-code + - en/criticalAI2-prompt-battle + - en/criticalAI4-no-ai-chatbot + references: + - en/p5/createinput + - en/p5/mousepressed + - en/p5/keycode +authors: + - Sarah Ciston + - with Emily Martinez + - with Minne Atairu +--- + +import Callout from "../../../components/Callout/index.astro"; + + +## What are we making? + +In this tutorial, you will learn how a machine learning sentiment analysis tool is trained, discover where its training text comes from, and how to examine its contents. Having access to the datasets that create models helps us understand their influences and potential biases. + +![A screenshot of the Dataset Explorer comparing two sentences with different sentiment scores; the one with a gay son ranks highly and the one with a gay daughter ranks low.](../images/featured/criticalAItutorial3.png) + +This tutorial is Part 3 in a series of four tutorials that focus on using AI creatively and thoughtfully. Feel free to adapt them for your own critical exploration of AI systems: + +- Part 1: [Chatting With/About Code](criticalAI1-chatting-with-about-code) +- Part 2: [Critical AI Prompt Battle](criticalAI2-prompt-battle) +- Part 4: [The No-AI Critical AI Chatbot](criticalAI4-no-ai-chatbot) + +## Steps + +### 1. Make a copy of the [p5.js Web Editor Demo](https://editor.p5js.org/sarahciston/sketches/4X5sRkWi0) + +You can follow along with this tutorial, as well as play with the finished example in the interactive demo. This demo builds on a pre-existing example created for ml5.js. For background on how the original example was created in ml5.js, see the Step-by-Step Guide in the [ml5.js Sentiment Model](https://docs.ml5js.org/#/reference/sentiment?id=step-by-step-guide) documentation. + +### 2. Try out sentiment analysis + +Enter a test phrase in the input field and press `ANALYZE`. When you do this, the sentiment analysis model scores the text somewhere between 0 and 1 for what it describes as negative to positive sentiment. What does negative or positive mean in this case? You might have an intuitive sense, but it's hard to pin down and even harder to quantify accurately. + + +By trying out a few different phrases, we can quickly see how subjective (even suspect) the tool is. For example, "Today is a happy day" ranks very high, but so does "Today is a sad day." "Today" itself has a very high score, while "tomorrow" has a quite low score, and "yesterday" is fairly high. How do words rank that are not sentimental values at all, but potentially value judgments? + +In the example from the image above, using text excerpted from the training dataset itself, the phrase `Each of the families has a gay son` ranks highly, while swapping only the word `daughter` causes the score to drop from an almost fully positive 95.9 for a gay son to a low 36.8 for a gay daughter. + + + +This tool analyzes only a single dimension of sentiment from negative to positive, so what does it actually understand sentiment (or feeling) to mean? It is unclear. What other dimensions would you consider important when thinking about feeling? Psychologist James A Russel began with two intersecting scales: valence (mild to intense) and affect (positive to negative). Other researchers have suggested various numbers of emotion categories, but none agree on a standard set of universal emotions (Barrett 2017). We might imagine many other measures besides emotional qualities for analyzing text as well. This variability shows how impossible it can be to quantify subjective qualities, no matter how many categories are specified. + + +### 3. Import the IMDB Sentiment dataset + +The sentiment model our tool uses is trained on a dataset of movie reviews from IMDB that have been hand-scored by data workers as either all "positive" (1) or all "negative" (0) in sentiment. + +We can take a look at the dataset itself to understand more about what it contains, by accessing the text of the dataset via API.[^API] + +[^API]: An API (Application Program Interface) (API) helps your software access other software elsewhere. It provides the code interface to get information from another platform, instead of a visual or auditory interface (for example) that a person might access on a website. + +We use the Javascript tool `fetch` to access the IMDB Sentiment dataset from the Hugging Face Dataset Hub. Let's look at the code that does this: + +```js +let TASK = `rows` // 'rows' (all) or 'search' or 'filter' +let DS_NAME = `stanfordnlp%2Fimdb` // name of dataset +let CONFIG = `plain_text` +let OFFSET = 0 // how many to skip over before searching +let ENTRIES = 10 // can display up to 100 at a time +let SPLIT = 'train' // or 'test' or 'unsupervised' +``` + +These are the variables that together build a URL we will send to fetch the database entries. The API for this specific tool can be reached at this [URL](https://datasets-server.huggingface.co/) and we add parameters to the URL to determine exactly what data to request. We've made these into string variables to make them easier to change later. Instead of changing the URL itself every time, you can change the variables at the top of your code: We set `TASK` to `rows` for now because we want to access any and all rows in the whole dataset. `DS_NAME` is set to `stanfordnlp%2Fimdb` to indicate the name of the dataset. `OFFSET` when set to `0` will start from the beginning of the dataset list, but if we set it to `1000` it would skip the first 1000 items. `ENTRIES` is the number of entries you are requesting, in this case 10, but you can request up to 100 at a time. `SPLIT` tells the API you'd like to work with the `train` (training) portion of the dataset, as opposed to another section that was set aside for testing. These variables are sent to our URL variable to build a complete URL for fetching from the API, so that this placeholder URL: + +```js +const URL = `https://datasets-server.huggingface.co/${TASK}?dataset=${DS_NAME}&config=${CONFIG}&split=${SPLIT}&offset=${OFFSET}&length=${ENTRIES}` +``` + +turns into this final URL, with the values from our variables filled in: + +```js +const URL = `https://datasets-server.huggingface.co/rows?dataset=stanfordnlp%2Fimdb&config=plain_text&split=train&offset=0&length=10` +``` + + +You can paste this version of the link into your browser and see the output. This is also a way to test that your fetch code will work. + + +With the function `const response = await fetch(URL);` we are calling out to the website specified by our URL variable. We make this an asynchronous function by putting `await` in front, so that the program will wait for the results to load before moving on. We also add error handling by wrapping it in a `try {}` block and including if statements for in case the response is not good. + +Then, we also convert the response from json into a Javascript readable object, and finally return the results. + + +What kinds of knowledge do large datasets like this one contain and convey? When they are used to train machine learning tools, what do they "teach" those tools about different communities? How do they instill values? + +Taking gender or sexuality as examples, we can look at differences between how terminology is used in datasets to track what kinds of values are being transmitted. This includes the types of words used, how frequently they are used, in what context and about what topics, as well as what language and topics are not included. + +For example, can you tell with this tool how often the word "queer" appears in this dataset (16), or the word "gay" (384) versus the word "lesbian" (203) or "bisexual" (17)? How are these words most often used and discussed? Check the console to read the `num_rows_total` and sample excerpts from the results. + +You might even paste excerpts back into the sentiment input to see how they score. + + +### 4. View selections from dataset + +Once fetch runs, the results appear in the console, thanks to our addition of `console.log(res)`. We can see the first 10 entries from the dataset by opening up the object with the drop down arrows. Each of them will have a `row_idx` and a `row`, which contains two items: `text` for the text of the training data and `label` for whether that text was scored as having positive (`1`) or negative (`0`) sentiment. The initial number is an array number displayed by your own console. + +```js +0: + row_idx: 0 + row: + text: "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. [...]" + label: 0 +``` + + +You can browse the entire dataset by playing with the variables for ENTRIES and OFFSET. Try reading some of the dataset and noticing how different entries are scored for sentiment. + + + +The dataset content is uncensored. Some of it may be offensive, uncomfortable, or not appropriate for your work. These features may appear where you'd least expect. Use your best judgment as to whether you are prepared to view this material. + + + +As you read reviews from the dataset, see if you agree with the hand-coded scores of 0 or 1 that were provided as part of creating the dataset. Note that these are the only choices: Human scorers often must focus on one aspect of the text in order to score it as completely positive or completely negative, because these scores cannot describe the text as a whole. Would you score the texts the same way as the dataset creators did? These decisions have implications for the scores you see as you tested the sentiment analysis tool in step 2, because they trained the tool. When taken together they determine what words and phrases score higher and lower. Now that you've seen how the sentiment analyzer works, it might not feel as intuitive. + + +### 5. Search dataset by keyword + +Now let's look for themes in the dataset by searching for keywords. We have added some parameters to our search so that we can do this. At the top of your code, add the variable `let SEARCH = 'rainbow';` or any word you want. + +Also change the existing `TASK` variable so that it reads `let TASK = 'search'` instead of `'rows'`. + +And finally add a search string variable `&query=${SEARCH}` to the URL so that it looks like this: + +```js +const URL = `https://datasets-server.huggingface.co/${TASK}?dataset=${DS_NAME}&config=${CONFIG}&split=${SPLIT}**&query=${SEARCH}**&offset=${OFFSET}&length=${ENTRIES}` +``` + +Now when you enter a search term in the search bar, then hit "SEARCH," you will be accessing a subset of the dataset that has been filtered for only entries that include your search term. + + +To filter for positive or negative reviews only, you can also add a variable for `let FILTER = "'label'=1"` (note the double and single quotes wrapping the label and whole filter). And then add a variable string `&where=${FILTER}` to the URL. + + + +As you try different keywords in your search, look for differences in the tone of the text you find and how it is scored. What do you notice about how the dataset has scored different kinds of texts? Look for differences in the depiction of topics. You may notice that even keywords that seem "neutral" can also bring up problematic representations of race, gender, sexuality. + + +### 6. Try this: Find and import another dataset + +Visit the [Hugging Face Hub](https://huggingface.co/datasets/) or other data repositories to find other datasets available for exploring. You can use `fetch()` and adapt this same basic template to work with another dataset, by modifying the URL you fetch and then modifying the JSON object you get as results. + +In Hugging Face, search for any dataset and if the API is available it will have an "API" button as part of its dataset viewer. Here is the [AllenAI C4 dataset](https://huggingface.co/datasets/allenai/c4) on the [Hub](https://huggingface.co/datasets/allenai/c4) and here is a basic version of its [API endpoint](https://datasets-server.huggingface.co/rows?dataset=allenai%2Fc4&config=af&split=train&offset=0&length=100) + +If you look at the sample entries in the HF Hub, you'll see it has a field called "text" just like the IMDB dataset, and it has a field called "timestamp" but it does not have a field called "label." So update your JSON if you want to see that field instead (and to avoid an error). + +```js +dataset.push({ + text: rows[r].row.text, + timestamp: rows[r].row.timestamp, + })} +``` + +Once you've modified the URL and JSON processing portions of your code, you should be able to access any dataset with an API in a similar way. + + +Many datasets provide a research paper or a short "datasheet" document (Gebru et al 2020) that describes how they were created, why they were created, and what they are meant (and not meant) to be used for. This is important to check as you begin using any pre-existing data, and it is also helpful information for answering any questions you may have as you investigate the data and tools you work with. + + + +As you consider the IMDB dataset and other datasets meant for sentiment analysis, think also about what is missing. For example, the sentiment analysis tool we were able to use works only in English. How would a sentiment analysis tool need to be adjusted to work in other languages? What datasets can you find on the Hub that might be a good fit? Would it be enough to use a multilingual dataset, or would other contexts require different approaches to the model design as a whole — for example, using a different scale than positive-negative? + + +## Takeaways + +### Investigating datasets + +This tutorial showed how to access and explore a publicly available dataset like the kinds that are used for training machine learning models. By looking not only at the outputs of models but also at the datasets that create them, we can understand more about their content and their limitations. Datasheets, when completed, help to understand the context in which datasets were created and why (Gebru et al 2020). Too often, AI models are assumed to be so-called "black boxes," but together these approaches suggest opportunities to rethink how these systems work from creative perspectives. + +For more about finding and using datasets conscientiously, you can check out Sarah's ["A Critical Field Guide for Working with Machine Learning Datasets"]([https://knowingmachines.org/critical-field-guide](https://knowingmachines.org/critical-field-guide)). + +### Taking issue with sentiment analysis + +This tutorial also showed some of the limitations of sentiment analysis by investigating the dataset for a sentiment analysis model. For example, from the dataset we could tell that the model works only in English. + +Also, with a scale that uses only positive to negative valence, it is an extremely limited and vague depiction of emotion.However, the solution is not to add more categories, because no amount of categories (no matter how vast) could capture the incredibly nuanced, subjective aspects of emotion. None would be verifiable, universal, or quantifiable. + +Emotion is just one subjective quality that shows the difficulty, but it gives us a way to think about how many ideas are impossible to capture with computation — from concepts of identity to the specificity of human experience. What happens when we try to make these fit into AI systems? We know from critical AI studies that much information can be lost and sometimes people are harmed — even from seemingly harmless, even helpful systems. + +## References + +Barrett, Lisa Feldman. 2017. *How Emotions Are Made: The Secret Life of the Brain.* + +Ciston, Sarah. 2023. "A CRITICAL FIELD GUIDE FOR WORKING WITH MACHINE LEARNING DATASETS." Edited by Kate Crawford and Mike Ananny. [https://knowingmachines.org/critical-field-guide](https://knowingmachines.org/critical-field-guide). + +Gebru, Timnit, Jamie Morgenstern, Briana Vecchione, Jennifer Wortman Vaughan, Hanna Wallach, Hal Daumé III, and Kate Crawford. 2020. "Datasheets for Datasets." *arXiv:1803.09010 [Cs]*, March. [http://arxiv.org/abs/1803.09010](http://arxiv.org/abs/1803.09010). + +Shroff, Lila. 2022. "Datasets as Imagination." May 22, 2022. [https://joinreboot.org/p/artist-datasets](https://joinreboot.org/p/artist-datasets). diff --git a/src/content/tutorials/en/criticalAI4-no-ai-chatbot.mdx b/src/content/tutorials/en/criticalAI4-no-ai-chatbot.mdx new file mode 100644 index 0000000000..1817ff7e61 --- /dev/null +++ b/src/content/tutorials/en/criticalAI4-no-ai-chatbot.mdx @@ -0,0 +1,268 @@ +--- +title: "Critical AI No-AI Chatbot" +description: Make an interactive chatbot without using any AI +category: criticalAI +categoryIndex: 4 +featuredImage: ../images/featured/criticalAItutorial4.png +featuredImageAlt: A screenshot of a chatbot interface with introductory conversation +relatedContent: + tutorials: + - en/criticalAI1-chatting-with-about-code + - en/criticalAI2-prompt-battle + - en/criticalAI3-sentiment-dataset-explorer +authors: + - Sarah Ciston + - with Emily Martinez + - with Minne Atairu +--- + +import Callout from "../../../components/Callout/index.astro"; + +## What are we making? + +In this tutorial you will program an interactive chatbot using absolutely no AI at all. Why skip AI when today's bots are using cutting-edge large language models? A no-AI bot shows how little programming is needed to create convincing human-machine interactions, and it can also help demonstrate some of the workings of even the biggest chatbot systems. + +![A screenshot of a chatbot interface with introductory conversation](../images/featured/criticalAItutorial4.png) + +This tutorial is Part 4 in a series of four tutorials that focus on using AI creatively and thoughtfully. Feel free to adapt them for your own critical exploration of AI systems: + +- Part 1: [Chatting With/About Code](criticalAI1-chatting-with-about-code) +- Part 2: [Critical AI Prompt Battle](criticalAI2-prompt-battle) +- Part 3: [Sentiment Training Dataset Explorer](criticalAI3-sentiment-dataset-explorer) + +## Steps + +### 1. Get started with the interactive tutorial example + +This tutorial challenges you to see how far you can get making your own chatbot with p5.js and the RiveScript language, created in the 2000s. Unlike tools like ChatGPT, it does not require a sophisticated AI model, lots of computational power, or tons of training data. + + +You can do a lot without machine learning. ELIZA, one of the earliest chatbots, was created in 1964. Today's AI chatbots use statistics to build their scripts by predicting the next most likely word, character, or subword (called a token), while older chatbots like ELIZA use hand-programmed scripts — like the kind we will run using p5.js and RiveScript. + + +Open the tutorial example in the [p5.js Web Editor](https://editor.p5js.org/sarahciston/sketches/1OX3CcBOO) and make a copy to get started. + + +This tutorial builds on video tutorials by [Dan Shiffman's Coding Train](https://www.youtube.com/watch?v=wf8w1BJb9Xc), which you can follow to understand more features of the RiveScript language. Also check out the [RiveScript documentation](https://www.rivescript.com/docs/tutorial). + + +### 2. Understand basic script files + +Check that the `index.html` file contains a script tag that will load the RiveScript library. It should look similar to the one we use to load the p5.js library, but with a link to the `rivescript.min.js` file: + +`` + +Next check that there is at least one file with the file extension `.rive`. In our case, we have two files: The `dialogue.rive` file is where we will write the main conversation parts for the bot. +To access the `.rive` files in the `sketch.js` file, first declare a variable `var bot = new RiveScript()`. This creates a RiveScript bot. + +Then, inside the p5 instance, make sure there is a `preload()` function that includes the following: + +```js +p5.preload = function(){ + bot.loadFile(["begin.rive", "dialogue.rive"]) + .then(botReady).catch(botError) + } +``` + +This function `bot.loadFile` tells the bot to access the RiveScript files we have created. It then calls a function we make called `botReady` that simply prints to the console that it is ready. We handle any errors with a function we create called `botError()`. + +We also make a function `botChat()` that will run when the `Submit` button is pressed: + +```js +function botChat(){ + + bot.sortReplies(); + let username = "local-user" + let inputValue = userInput.value() + p5.createP((inputValue)).class('userConvo').parent(outputDiv) + + let reply = bot.reply(username, inputValue).then(function(reply) { + console.log("Bot says: " + reply); + p5.createP(reply).class('botConvo').parent(outputDiv) + }); + + userInput.value('') // clear input value for next turn + } +``` + +In this function, we get the user's input from the input field and send it to the RiveScript bot. RiveScript functions handle the user's inputs based on the dialogue rules we will write, and then we display it in a paragraph element using p5.js. For more details on this, see the Coding Train video. + +Let's return to the dialogue file to take a closer look at building a conversation. + +### 3. Add user inputs and replies + +Start by considering what you want your chatbot to discuss. What will your users ask the bot about? What kinds of questions are required to get the information they need? Make a short list of the types of questions they might ask. If our bot is teaching us about chatbot history, users might ask questions like: + +`+ what was the earliest chatbot` + +`+ when was the first chatbot created` + +`+ who made the earliest chatbot` + +Lines that begin with `+` are inputs. These should be written in all lowercase with no punctuation. + +To write replies from the bot, put a line below each input that begins with a minus sign (hyphen) (`-`). You can add additional lines and the bot will choose at random. Leave a space before the next input. + +``` ++ what was the earliest chatbot +- The first well-known chatbot was called ELIZA. + ++ when was the first chatbot created +- ELIZA was designed in 1964 and introduced in 1966 at MIT. + ++ who made the earliest chatbot +- Computer scientist Joseph Weizenbaum made the bot ELIZA as an experiment in 1964. +``` + + +Before all-purpose chatbots, most bots had to be designed for a specific purpose and with a specific persona in mind. Choose a tone that matches the topic and purpose of your bot. How does the bot's personality change based on its purpose? How is it expressed through language choices? + + +### 4. Add variation with input alternatives + +ELIZA's most famous script was DOCTOR, which mimicked a therapist (but there were others!). DOCTOR was so convincing because it used its therapy persona as a way to ask lots of open-ended questions. It used (pseudo) randomness, placeholders, and wildcards to create the sense that the machine was responding as a person would. Its script borrowed from the user's input to create its personalized responses. + +Let's use some of the RiveScript features to do the same. Users won't always ask the exact questions you propose. They will sometimes ask variations on these topics. Let's update the users' input to allow for some option, by using the alternatives structure: + +``` ++ when was (the first chatbot|it) created +- ELIZA was designed in 1964 and introduced in 1966 at MIT. + ++ who made (the first chatbot|it) +- Computer scientist Joseph Weizenbaum made the bot ELIZA as an experiment in 1964 +``` + +The parentheses designate a set of multiple choices, and the pipe character separates each choice. The word `it` can appear in that position, or the words `the first chatbot` can appear instead. Here's another more complex example we can use to group some questions that share a theme: + +```+ (can machines think|are bots smart|are you as good as chatgpt|are computers and humans alike?)``` + +### 5. Add more variety with multiple replies + +For every phrase that we anticipate our user might say, we can add additional examples of responses. This makes your bot appear more dynamic by offering a choice of responses around the same topic. The additional lines that begin with the minus sign are all potential replies for the same variations of this user input: + +``` ++ (can machines think|are bots smart|are you as good as chatgpt|are computers and humans alike?) +- I don't think so. But I just thought that, so ... it's a paradox. +- I only respond to answerable questions. +- "I am a bot. All bots are liars." -Mr. Mind (a bot) +``` + +By grouping the thinking machine questions and their answers, the responses will be chosen at random and will apply to all variations of the question. + +We can also adjust how frequently each answer might be given, by adding `{weight=X}` and making X a number for each answer: + +``` +- Only a bot would ask me that!{weight=3} +- I don't think so. But I just thought that, so ... it's a paradox.{weight=1} +- I only respond to answerable questions.{weight=1} +- "I am a bot. All bots are liars."{weight=2} +``` + +With no weights specified, the replies will be chosen at random. + + +By adding weights, you can explicitly add biases to your bot's responses, because it will answer in particular ways more often than other ways. Remember, biases are always built in as part of computational systems, no matter what. No system can account for all implicit bias, because the very design of computer systems is based on sorting into categories. In the interactive tool [OpinionGPT](https://opiniongpt.informatik.hu-berlin.de/), researchers trained a GPT model only on texts written by particular communities, in order to emphasize several types of biases. They hoped to make examples of existing biases more obvious to show potential issues within all computational systems. As you play with their model, consider Can you hear bot 'personas' or characterizations that come through in the language used by their trained models? What kinds of language does each model use differently from the others? As you choose language for your own chatbot, what tone and word choices will you use to convey its persona and message? + + +### 6. Work with wildcards, keywords, and stored variables + +To help your bot appear to respond specifically to its users, it can copy the user's language in its replies. + + +This was part of the strategy that made ELIZA-type bots so compelling. ELIZA worked with keywords that triggered specific templates. These would transform the user's input into a response that suited the topic, while reusing the input text. An input like "I don't think you like me" might become "What makes you think I don't like you" with the simple substitution of words like "I" for "you." + + +You can use asterisk (`*`) as a wildcard character to interact with open-ended inputs. Putting an asterisk in brackets makes it optional, and putting it on either side of a word will make the rule apply whenever that keyword appears: + +``` ++ [*] bot [*] +- I'm a bot. Are you a bot too? +- Only a bot would ask me that! +``` + +This example tells the script to allow any text that precedes or follows the word `bot`. + +Save the phrases that the user entered and refer to them in the reply by using `` in the reply. For example, your bot could respond with: `My favorite is whatever yours is.` + +If there is more than one wildcard, the bot can use `` and ``. + +It also includes a wildcard for what follows. For a reply, your bot could respond with: `Do you know why you ?` and `` will fill in the optional input from the user, while `` will fill in the wildcard input from the user. + +### 6. Try these! + +Here are some additional features you may want to try. To decide how you'll use them, think about how their analogue equivalents shape conversation. You can get creative with their use also, by trying them out for other unintended purposes. + + +This is just an overview. Read the [RiveScript tutorial](https://www.rivescript.com/docs/tutorial) to understand more details of bot syntax that can add conversational flow to your chatbot. There are optional keywords, arrays, variables, and many other concepts that relate to similar programming concepts. That's because many chatbots are rule-based systems, which means they are written to follow a flow of rules set by their designers. They differ from the latest chatbots, which have rules that are created statistically (but these are also designed by programmers). + + +**Set variables** to store information, just like you do in p5.js. Remember when the bot asked you your name at the start? It saved that as a variable, with the formula: + +``` ++ my name is * +- > nice to meet you ! +``` + +This exchange saves the wildcard's information as a variable called `name`. Then you can retrieve it later using the variable: + +``` ++ what is my name +* == undefined => You never told me. +- your name is +``` + +**Use conditionals** to test whether a value has been set for `name`. They work like an if statement (indicated by an asterisk at the front of the line). If `name` equals `undefined` because there is no value stored, then the answer on that line is given. Otherwise, if `name` is not undefined because a value has been set for `name`, it ignores the conditional line and the response on the next line will be used instead. + +**Declare statements and make substitutions.** Use an exclamation point (`!`) at the start of a line to declare information up front, such as presets to load in the `begin.rive` file. Any `.rive` file begins with the version, which we set with an exclamation point: + +`! version = 2.0` + +We can also use this to define presets that the bot stores in memory: + +``` +! var name = your bot +! var age = 5 +``` + +The variables can also be recalled. Put them in replies like this: `"Hi my name is . I am years old."` + +Substitutions are declared the same way. A list of example substitutions is provided in the tutorial's `begin.rive` file, including for example: + +``` +! sub i'm = i am +! sub don't = do not +! sub what's = what is +``` + +**Redirect** sends a user input to a different response group with the at symbol: + +``` ++ [*] pineapple [*] +@ [*] bot [*] +``` + +In this example, any time a user enters the keyword banana (with optional words on either side), the bot is redirected to the group of responses for the keyword `bot`. + + +Substitutions and redirects are useful tools to allow the conversation to flow even when there are variations in speech by the users. They work so that you do not have to write duplicate versions of every user input for common variations on phrases. + + +**Previous/play along** lets the bot behave in a particular fashion if something was said more than one turn ago. Use the percentage (`%`) symbol to reference the line from prior exchanges. This lets your bot carry on a lengthier train of thought. How might this be useful in the context of a bot about bots, or for the topics your bot will discuss? + + +Try connecting your bot to APIs. What kinds of interactions do you want your bot to have, and what kinds of questions should it answer? For more dynamic interaction, you can consider adding `fetch` and API access (discussed in Tutorial 3), so that your bot can answer questions by updating its information from another website. For example, it could perform a Google search on your user's behalf. A non-AI bot that runs a Google search will use far fewer natural resources than the large language models used by tools like ChatGPT — and is much, much easier to program! + + +## Takeaways + +In comparison to how AI tools are presented, rule-based bots might look old-fashioned at first, but even the most complex tools do not always live up to expectations. They can be fragile and are limited to the constraints established by their platforms and the companies designing them. Writing your own dialogue for a chatbot lets you control the flow of conversation, as well as the visual interface and the methods of interaction. + +Making your own bots also contributes to understanding how systems (both AI and otherwise) interpret users' inputs. A lot of the AI systems we use now are complex human-computer systems, of which only a very small part requires any machine learning tasks. In your own bot, consider how little programming was needed to make an artificial conversation feel natural. Many of these capabilities existed in the first chatbot made by Joseph Weizenbaum in 1964, and artificial intelligence was envisioned as early as 1950 by Alan Turing. The current explosion of bots is not a new fad, but part of a long lineage of human-computer interaction. + +As you choose what tools to pick up, is AI necessary for making sophisticated work for all your projects? In which cases is it needed, in which cases will you customize your AI tools using p5.js, and in which cases will you opt out? + +# References + +Haller, Patrick, Ansar Aynetdinov, and Alan Akbik. 2024. "OpinionGPT: Modelling Explicit Biases in Instruction-Tuned LLMs." In *Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: System Demonstrations)*, edited by Kai-Wei Chang, Annie Lee, and Nazneen Rajani, 78-86. Mexico City, Mexico: Association for Computational Linguistics. [https://doi.org/10.18653/v1/2024.naacl-demo.8](https://doi.org/10.18653/v1/2024.naacl-demo.8). + +Shiffman, Daniel. The Coding Train, dir. 2017. 10.2: Chatbots with RiveScript - Programming with Text. [https://www.youtube.com/watch?v=wf8w1BJb9Xc](https://www.youtube.com/watch?v=wf8w1BJb9Xc). diff --git a/src/content/tutorials/images/featured/criticalAItutorial1.png b/src/content/tutorials/images/featured/criticalAItutorial1.png new file mode 100644 index 0000000000..bef79daeca Binary files /dev/null and b/src/content/tutorials/images/featured/criticalAItutorial1.png differ diff --git a/src/content/tutorials/images/featured/criticalAItutorial2.png b/src/content/tutorials/images/featured/criticalAItutorial2.png new file mode 100644 index 0000000000..d78e867980 Binary files /dev/null and b/src/content/tutorials/images/featured/criticalAItutorial2.png differ diff --git a/src/content/tutorials/images/featured/criticalAItutorial3.png b/src/content/tutorials/images/featured/criticalAItutorial3.png new file mode 100644 index 0000000000..f6f3b31653 Binary files /dev/null and b/src/content/tutorials/images/featured/criticalAItutorial3.png differ diff --git a/src/content/tutorials/images/featured/criticalAItutorial4.png b/src/content/tutorials/images/featured/criticalAItutorial4.png new file mode 100644 index 0000000000..0fec682f1d Binary files /dev/null and b/src/content/tutorials/images/featured/criticalAItutorial4.png differ