diff --git a/.gitignore b/.gitignore index a0eaf390..1b99171f 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ examples.egg-info/ # worktrees .worktrees/ .env + +bird-search-example-main \ No newline at end of file diff --git a/docs/full-text-search.ipynb b/docs/full-text-search.ipynb index 9982be7a..ae77af62 100644 --- a/docs/full-text-search.ipynb +++ b/docs/full-text-search.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "title", + "id": "0", "metadata": {}, "source": [ "# Bird Search — Pinecone Full-Text Search\n", @@ -28,12 +28,12 @@ }, { "cell_type": "markdown", - "id": "c92bce7e", + "id": "1", "metadata": {}, "source": [ "## Contents\n", "\n", - "- [0. Install the SDK](#0-install-the-sdk)\n", + "- [0. Install the SDK and other dependencies](#0-install-the-sdk-and-other-dependencies)\n", "- [1. Connect to the index](#1-connect-to-the-index)\n", "- [2. Create index and load data](#2-create-index-and-load-data)\n", "- [3. Display helper](#3-display-helper)\n", @@ -57,18 +57,18 @@ }, { "cell_type": "markdown", - "id": "install-header", + "id": "2", "metadata": {}, "source": [ - "## 0. Install the SDK\n", + "## 0. Install the SDK and other dependencies\n", "\n", "> ⚠️ Because this feature is in public preview, it uses the `preview` namespace. `pc.preview.*` targets Pinecone API version `2026-01.alpha`. Pin your SDK version when relying on it." ] }, { "cell_type": "code", - "execution_count": 85, - "id": "install", + "execution_count": 26, + "id": "3", "metadata": {}, "outputs": [ { @@ -80,27 +80,38 @@ } ], "source": [ - "try:\n", - " import google.colab\n", - " IN_COLAB = True\n", - "except ImportError:\n", - " IN_COLAB = False\n", - "\n", "%pip install --quiet \\\n", - " \"pinecone==9.0.0rc1\" \\\n", + " \"pinecone==9.0.0\" \\\n", " \"pinecone-notebooks==0.1.1\" \\\n", - " \"httpx[http2]==0.28.1\" \\\n", - " \"msgspec==0.21.1\" \\\n", - " \"orjson==3.11.8\" \\\n", " \"tqdm==4.67.3\" \\\n", " \"google-genai==1.73.1\" \\\n", " \"python-dotenv==1.2.2\" \\\n", - " \"pillow==12.2.0\"" + " \"pillow==12.2.0\"\n", + "\n", + "import importlib.util\n", + "import json\n", + "import os\n", + "import textwrap\n", + "import time\n", + "import urllib.request\n", + "import zipfile\n", + "from getpass import getpass\n", + "from pathlib import Path\n", + "\n", + "import pinecone\n", + "from dotenv import load_dotenv\n", + "from google import genai\n", + "from google.genai import types\n", + "from PIL import Image\n", + "from pinecone.preview import SchemaBuilder\n", + "from tqdm import tqdm\n", + "\n", + "IN_COLAB = importlib.util.find_spec(\"google.colab\") is not None" ] }, { "cell_type": "markdown", - "id": "connect-header", + "id": "4", "metadata": {}, "source": [ "## 1. Connect to the index\n", @@ -112,17 +123,14 @@ }, { "cell_type": "code", - "execution_count": 86, - "id": "connect", + "execution_count": 27, + "id": "5", "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from getpass import getpass\n", - "import pinecone\n", - "from dotenv import load_dotenv\n", "load_dotenv() # loads .env into os.environ; no-op if file absent\n", "\n", + "\n", "def get_pinecone_api_key():\n", " \"\"\"Get Pinecone API key from environment, Colab auth, or prompt.\n", "\n", @@ -134,6 +142,7 @@ " if api_key is None:\n", " try:\n", " from pinecone_notebooks.colab import Authenticate\n", + "\n", " Authenticate()\n", " api_key = os.environ.get(\"PINECONE_API_KEY\")\n", " except ImportError:\n", @@ -143,12 +152,13 @@ "\n", " return api_key\n", "\n", + "\n", "PINECONE_API_KEY = get_pinecone_api_key()" ] }, { "cell_type": "markdown", - "id": "27fc682f", + "id": "6", "metadata": {}, "source": [ "### Setup Google API key" @@ -156,8 +166,8 @@ }, { "cell_type": "code", - "execution_count": 87, - "id": "32d9b399", + "execution_count": 28, + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -172,12 +182,13 @@ "\n", " return api_key\n", "\n", - "GOOGLE_API_KEY = get_google_api_key()" + "\n", + "GOOGLE_API_KEY = get_google_api_key()" ] }, { "cell_type": "markdown", - "id": "60124535", + "id": "8", "metadata": {}, "source": [ "### Initialize the client and connect" @@ -185,8 +196,8 @@ }, { "cell_type": "code", - "execution_count": 88, - "id": "connect-pinecone", + "execution_count": 29, + "id": "9", "metadata": {}, "outputs": [ { @@ -206,7 +217,7 @@ ")\n", "\n", "INDEX_NAME = \"bird-search-fts\"\n", - "NAMESPACE = \"birds\" # namespaces partition a single index — useful for isolating datasets or tenants\n", + "NAMESPACE = \"birds\" # namespaces partition a single index — useful for isolating datasets or tenants\n", "\n", "print(f\"SDK version : {pinecone.__version__}\")\n", "print(f\"Running in : {'Colab' if IN_COLAB else 'local Jupyter'}\")\n", @@ -219,9 +230,8 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "load-data-header", + "id": "10", "metadata": {}, "source": [ "## 2. Create index and load data\n", @@ -233,42 +243,46 @@ }, { "cell_type": "code", - "execution_count": 71, - "id": "corpus-download", + "execution_count": 30, + "id": "11", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "DATA_DIR: /Users/jenna/Development/playground/fts/bird-semantic-search-main/parsed_birds\n" + "DATA_DIR: /Users/jenna/Development/pinecone-io/examples/docs/bird-search-example-main/parsed_birds\n" ] } ], "source": [ - "import json, os, pathlib, time, urllib.request, zipfile\n", - "\n", - "if not pathlib.Path(\"bird-semantic-search-main\").exists():\n", + "if not Path(\"bird-search-example-main\").exists():\n", " print(\"Downloading bird corpus...\")\n", " urllib.request.urlretrieve(\n", - " \"https://github.com/pinecone-io/bird-semantic-search/archive/refs/heads/main.zip\",\n", + " \"https://github.com/pinecone-io/bird-search-example/archive/refs/heads/main.zip\",\n", " \"main.zip\",\n", " )\n", " with zipfile.ZipFile(\"main.zip\") as zf:\n", " zf.extractall()\n", - " pathlib.Path(\"main.zip\").unlink()\n", + " Path(\"main.zip\").unlink()\n", " print(\"Done.\")\n", "\n", - "DATA_DIR = pathlib.Path(os.environ.get(\"BIRD_DATA_DIR\", \"bird-semantic-search-main/parsed_birds\")).expanduser().resolve()\n", - "N_BIRDS = 200\n", - "EMBED_DIM = 768\n", + "DATA_DIR = (\n", + " Path(\n", + " os.environ.get(\"BIRD_DATA_DIR\", \"bird-search-example-main/parsed_birds\")\n", + " )\n", + " .expanduser()\n", + " .resolve()\n", + ")\n", + "N_BIRDS = 200\n", + "EMBED_DIM = 768\n", "GEMINI_MODEL = \"gemini-embedding-2\"\n", "print(f\"DATA_DIR: {DATA_DIR}\")" ] }, { "cell_type": "markdown", - "id": "index-setup-header", + "id": "12", "metadata": {}, "source": [ "### Create index and load data\n", @@ -278,34 +292,20 @@ }, { "cell_type": "code", - "execution_count": 72, - "id": "index-setup", + "execution_count": null, + "id": "13", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connected to: bird-search-fts\n" - ] - } - ], + "outputs": [], "source": [ - "from PIL import Image\n", - "from tqdm import tqdm\n", - "from google.genai import types as genai_types\n", - "from pinecone.preview import SchemaBuilder\n", - "from google import genai as _genai\n", - "\n", - "gem_loader = _genai.Client(api_key=GOOGLE_API_KEY)\n", - "EMBED_CONFIG = genai_types.EmbedContentConfig(output_dimensionality=EMBED_DIM)\n", + "gem_loader = genai.Client(api_key=GOOGLE_API_KEY)\n", + "EMBED_CONFIG = types.EmbedContentConfig(output_dimensionality=EMBED_DIM)\n", "\n", "if not pc.preview.indexes.exists(INDEX_NAME):\n", " schema = (\n", " SchemaBuilder()\n", " .add_string_field(\"bird_name\", full_text_search={\"language\": \"en\"})\n", - " .add_string_field(\"intro\", full_text_search={\"language\": \"en\"})\n", - " .add_string_field(\"body\", full_text_search={\"language\": \"en\", \"stemming\": True})\n", + " .add_string_field(\"intro\", full_text_search={\"language\": \"en\"})\n", + " .add_string_field(\"body\", full_text_search={\"language\": \"en\", \"stemming\": True})\n", " .add_dense_vector_field(\"image_embedding\", dimension=EMBED_DIM, metric=\"cosine\")\n", " .build()\n", " )\n", @@ -322,58 +322,17 @@ }, { "cell_type": "code", - "execution_count": 30, - "id": "load-data-ingest", + "execution_count": null, + "id": "14", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading 200 birds from /Users/jenna/Development/playground/fts/bird-semantic-search-main/parsed_birds\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Embedding images: 100%|██████████| 200/200 [02:09<00:00, 1.55it/s]\n", - "/Users/jenna/Development/playground/fts/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Built 200 documents.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Upserting: 100%|██████████| 4/4 [00:01<00:00, 3.47batch/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded 150 / 200 documents\n", - "Waiting for documents to be indexed...\n", - " not yet indexed, retrying...\n", - " not yet indexed, retrying...\n", - "Data is searchable — ready to query.\n" - ] - } - ], + "outputs": [], "source": [ "# ── Load metadata + filter usable birds ───────────────────────────────────────\n", "meta = json.loads((DATA_DIR / \"parsing_metadata.json\").read_text())\n", "\n", "slugs = [\n", - " slug for slug, entry in meta.items()\n", + " slug\n", + " for slug, entry in meta.items()\n", " if (DATA_DIR / \"text\" / entry.get(\"text_file\", \"\")).exists()\n", " and entry.get(\"images\")\n", " and (DATA_DIR / \"images\" / entry[\"images\"][0][\"local_path\"]).exists()\n", @@ -382,6 +341,7 @@ "\n", "print(f\"Loading {len(slugs)} birds from {DATA_DIR}\")\n", "\n", + "\n", "# ── Helper: split article text into intro + body ───────────────────────────────\n", "def split_intro_body(text):\n", " paragraphs = [p.strip() for p in text.split(\"\\n\\n\") if p.strip()]\n", @@ -389,28 +349,33 @@ " return \"\", \"\"\n", " return paragraphs[0], \"\\n\\n\".join(paragraphs[1:])\n", "\n", + "\n", "# ── Embed images + build documents ────────────────────────────────────────────\n", "docs = []\n", "for slug in tqdm(slugs, desc=\"Embedding images\"):\n", - " entry = meta[slug]\n", + " entry = meta[slug]\n", " img_path = DATA_DIR / \"images\" / entry[\"images\"][0][\"local_path\"]\n", " with Image.open(img_path) as img:\n", " img.load()\n", " resp = gem_loader.models.embed_content(\n", - " model=GEMINI_MODEL, contents=img, config=EMBED_CONFIG,\n", + " model=GEMINI_MODEL,\n", + " contents=img,\n", + " config=EMBED_CONFIG,\n", " )\n", " image_embedding = list(resp.embeddings[0].values)\n", "\n", " text = (DATA_DIR / \"text\" / entry[\"text_file\"]).read_text(encoding=\"utf-8\")\n", " intro, body = split_intro_body(text)\n", "\n", - " docs.append({\n", - " \"_id\": slug,\n", - " \"bird_name\": slug.replace(\"_\", \" \"),\n", - " \"intro\": intro,\n", - " \"body\": body,\n", - " \"image_embedding\": image_embedding,\n", - " })\n", + " docs.append(\n", + " {\n", + " \"_id\": slug,\n", + " \"bird_name\": slug.replace(\"_\", \" \"),\n", + " \"intro\": intro,\n", + " \"body\": body,\n", + " \"image_embedding\": image_embedding,\n", + " }\n", + " )\n", "\n", "print(f\"Built {len(docs)} documents.\")\n", "\n", @@ -442,7 +407,7 @@ }, { "cell_type": "markdown", - "id": "751d2f80", + "id": "15", "metadata": {}, "source": [ "Note: it is expected to have some failures in the batch upsert as some of the document bodies will exceed the max token count. Expect to see `Uploaded 150 / 200 documents`." @@ -450,7 +415,7 @@ }, { "cell_type": "markdown", - "id": "display-helper-header", + "id": "16", "metadata": {}, "source": [ "## 3. Display helper\n", @@ -460,21 +425,19 @@ }, { "cell_type": "code", - "execution_count": 89, - "id": "display-helper", + "execution_count": 31, + "id": "17", "metadata": {}, "outputs": [], "source": [ - "import textwrap\n", - "from pathlib import Path\n", - "\n", "def show_results(response, snippet_field=\"intro\", max_lines=10, image_dir=None):\n", " \"\"\"Print search results with score, bird name, and a text snippet. Pass max_lines=-1 for full field.\"\"\"\n", " if not response.matches:\n", " print(\"(no matches)\")\n", " return\n", " if image_dir is not None:\n", - " from IPython.display import display, Image as IPImage\n", + " from IPython.display import Image as IPImage\n", + " from IPython.display import display\n", " for doc in response.matches:\n", " name = doc.get(\"bird_name\") or doc._id\n", " print(f\"Score {doc.score:.4f} [{doc._id}] {name}\")\n", @@ -486,22 +449,27 @@ " if snippet:\n", " if max_lines == -1:\n", " wrapped = textwrap.fill(\n", - " snippet.strip(), width=88,\n", - " initial_indent=\" \", subsequent_indent=\" \",\n", + " snippet.strip(),\n", + " width=88,\n", + " initial_indent=\" \",\n", + " subsequent_indent=\" \",\n", " )\n", " else:\n", " wrapped = textwrap.fill(\n", - " snippet.strip(), width=88,\n", - " initial_indent=\" \", subsequent_indent=\" \",\n", - " max_lines=max_lines, placeholder=\" …\",\n", + " snippet.strip(),\n", + " width=88,\n", + " initial_indent=\" \",\n", + " subsequent_indent=\" \",\n", + " max_lines=max_lines,\n", + " placeholder=\" …\",\n", " )\n", " print(wrapped)\n", - " print()\n" + " print()" ] }, { "cell_type": "markdown", - "id": "tokens-explainer", + "id": "18", "metadata": {}, "source": [ "## How tokens work\n", @@ -523,7 +491,7 @@ }, { "cell_type": "markdown", - "id": "search-header", + "id": "19", "metadata": {}, "source": [ "## 4. Single-term token match — `\"migration\"`\n", @@ -535,8 +503,8 @@ }, { "cell_type": "code", - "execution_count": 90, - "id": "search-body", + "execution_count": 32, + "id": "20", "metadata": {}, "outputs": [ { @@ -921,7 +889,7 @@ }, { "cell_type": "markdown", - "id": "step4-name-header", + "id": "21", "metadata": {}, "source": [ "## 5. Searching `bird_name` — and blending multiple fields\n", @@ -942,38 +910,10 @@ }, { "cell_type": "code", - "execution_count": 33, - "id": "step4-name-only", + "execution_count": null, + "id": "22", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Searching bird_name only for \"sparrow\":\n", - "\n", - "Score 3.1787 [Bachman%27s_sparrow] Bachman%27s sparrow\n", - " Peucaea aestivalis aestivalis Peucaea aestivalis bachmani Peucaea aestivalis\n", - " illinoensis\n", - "\n", - "Score 3.1787 [Baird%27s_sparrow] Baird%27s sparrow\n", - " Baird's sparrow (Centronyx bairdii) is a species of North American birds in the\n", - " family Passerellidae of order Passeriformes. It is a migratory bird native to the\n", - " United States, Canada, and Mexico.\n", - "\n", - "Score 3.1787 [Black-chested_sparrow] Black-chested sparrow\n", - " Aimophila humeralis\n", - "\n", - "Score 3.1787 [Black-chinned_sparrow] Black-chinned sparrow\n", - " Spinites atrogularis Cabanis, 1851 Struthus atrimentalis Couch, 1854 Spizella evura\n", - " Coues, 1866 Spizella atrigularis Salvin & Godman, 1886\n", - "\n", - "Score 3.1787 [American_tree_sparrow] American tree sparrow\n", - " Spizella monticolaSpizella arboreaPasserella arborea\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# ── Single-field search: bird_name only ────────────────────────────────────\n", "QUERY = \"sparrow\"\n", @@ -991,7 +931,7 @@ }, { "cell_type": "markdown", - "id": "step4-blend-header", + "id": "23", "metadata": {}, "source": [ "Name-only returns just 5 results — every bird with \"sparrow\" in its formal\n", @@ -1007,43 +947,10 @@ }, { "cell_type": "code", - "execution_count": 34, - "id": "step4-blend", + "execution_count": null, + "id": "24", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Blending bird_name + intro + body for \"sparrow\":\n", - "\n", - "Score 12.3510 [Baird%27s_sparrow] Baird%27s sparrow\n", - " Baird's sparrow (Centronyx bairdii) is a species of North American birds in the\n", - " family Passerellidae of order Passeriformes. It is a migratory bird native to the\n", - " United States, Canada, and Mexico.\n", - "\n", - "Score 9.9170 [Ammospiza_maritima_mirabilis] Ammospiza maritima mirabilis\n", - " The Cape Sable seaside sparrow (Ammospiza maritima mirabilis) is a subspecies of the\n", - " seaside sparrow, a species of bird in the family Passerellidae native to the United\n", - " States. This subspecies is endemic to southern Florida. It is designated endangered\n", - " under the Endangered Species Act.\n", - "\n", - "Score 8.3378 [Bachman%27s_sparrow] Bachman%27s sparrow\n", - " Peucaea aestivalis aestivalis Peucaea aestivalis bachmani Peucaea aestivalis\n", - " illinoensis\n", - "\n", - "Score 8.1409 [Black-chinned_sparrow] Black-chinned sparrow\n", - " Spinites atrogularis Cabanis, 1851 Struthus atrimentalis Couch, 1854 Spizella evura\n", - " Coues, 1866 Spizella atrigularis Salvin & Godman, 1886\n", - "\n", - "Score 8.1117 [American_tree_sparrow] American tree sparrow\n", - " Spizella monticolaSpizella arboreaPasserella arborea\n", - "\n", - "IDs that appear in blended results but not in name-only:\n", - " [Ammospiza_maritima_mirabilis] Ammospiza maritima mirabilis (score 9.9170)\n" - ] - } - ], + "outputs": [], "source": [ "# ── Blended search: bird_name + intro + body ───────────────────────────────\n", "response_blend = idx.documents.search(\n", @@ -1051,8 +958,8 @@ " top_k=5,\n", " score_by=[\n", " {\"type\": \"text\", \"field\": \"bird_name\", \"query\": QUERY},\n", - " {\"type\": \"text\", \"field\": \"intro\", \"query\": QUERY},\n", - " {\"type\": \"text\", \"field\": \"body\", \"query\": QUERY},\n", + " {\"type\": \"text\", \"field\": \"intro\", \"query\": QUERY},\n", + " {\"type\": \"text\", \"field\": \"body\", \"query\": QUERY},\n", " ],\n", " include_fields=[\"bird_name\", \"intro\"],\n", ")\n", @@ -1061,7 +968,7 @@ "show_results(response_blend, \"intro\")\n", "\n", "# Show which IDs appear in blend but not in name-only search\n", - "name_ids = {d._id for d in response_name.matches}\n", + "name_ids = {d._id for d in response_name.matches}\n", "blend_ids = {d._id for d in response_blend.matches}\n", "new_in_blend = blend_ids - name_ids\n", "if new_in_blend:\n", @@ -1075,7 +982,7 @@ }, { "cell_type": "markdown", - "id": "step5-header", + "id": "25", "metadata": {}, "source": [ "## 6. Introducing Lucene syntax: single-term `query_string`\n", @@ -1089,59 +996,10 @@ }, { "cell_type": "code", - "execution_count": 54, - "id": "step5-code", + "execution_count": null, + "id": "26", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query_string body:(migration)\n", - "\n", - "Score 1.9391 [Bachman%27s_warbler] Bachman%27s warbler\n", - " This bird was first recorded in 1832 by the Reverend John Bachman, who found the\n", - " species near Charleston, South Carolina, and presented study skins and descriptions\n", - " to his friend and collaborator, John James Audubon. Audubon never saw the bird alive\n", - " but named it in honor of Bachman in 1833. An alternate common name of the species\n", - " used by some 19th-century authors, paralleling similar names given to other species\n", - " once placed in the genus Helinaia, is Bachman's swamp warbler. The blue-winged …\n", - "\n", - "Score 1.8468 [Antillean_nighthawk] Antillean nighthawk\n", - " Its specific epithet, gundlachii, is in honor of Cuban naturalist Juan Gundlach.\n", - " The adults are dark with brown, grey and white patterning on the upperparts and\n", - " breast; the long wings are black and show a white bar in flight. The tail is dark\n", - " with white barring; the underparts are white with black bars. The adult male has a\n", - " white throat; the female has a light brown throat. The most distinguishing\n", - " characteristic to determine its identity from its closest relative the common …\n", - "\n", - "Score 1.8044 [American_woodcock] American woodcock\n", - " The American woodcock is the only species of woodcock inhabiting North America.\n", - " Although classified with the sandpipers and shorebirds in the family Scolopacidae,\n", - " the American woodcock lives mainly in upland settings. Its many folk names include\n", - " timberdoodle, bogsucker, night partridge, brush snipe, hokumpoke, and becasse. The\n", - " population of the American woodcock has fallen by an average of slightly more than\n", - " 1% annually since the 1960s. Most authorities attribute this decline to a loss of …\n", - "\n", - "Score 1.7618 [American_dusky_flycatcher] American dusky flycatcher\n", - " The dusky flycatcher is one of many species in the genus Empidonax. These species\n", - " are very similar in appearance and behavior, and they are notoriously difficult to\n", - " differentiate. The best characteristics for distinguishing these species are voice,\n", - " breeding habitat, and range. Adults have olive-gray upperparts, darker on the wings\n", - " and tail, with whitish underparts; they have a noticeable medium-width white eye\n", - " ring, white wing bars and a medium length tail. The breast is washed with olive- …\n", - "\n", - "Score 1.6152 [Baltimore_oriole] Baltimore oriole\n", - " The Baltimore oriole is the state bird of Maryland, and the namesake and mascot for\n", - " the Baltimore Orioles baseball team. The Baltimore oriole was formally described in\n", - " 1758 by the Swedish naturalist Carl Linnaeus in the tenth edition of his Systema\n", - " Naturae under the binomial name Coracias galbula. He specified the type locality as\n", - " America, but this was restricted to Virginia in 1931. Linnaeus based his account on\n", - " the \"Baltimore-Bird\" that had been described and illustrated by the English …\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# ── query_string — single term, equivalent to type:text ────────────────────────\n", "response_qs = idx.documents.search(\n", @@ -1151,13 +1009,13 @@ " include_fields=[\"bird_name\", \"body\"],\n", ")\n", "\n", - "print('query_string body:(migration)\\n')\n", + "print(\"query_string body:(migration)\\n\")\n", "show_results(response_qs, \"body\", 6)" ] }, { "cell_type": "markdown", - "id": "step6-header", + "id": "27", "metadata": {}, "source": [ "## 7. Requiring both terms: AND operator\n", @@ -1172,75 +1030,13 @@ }, { "cell_type": "code", - "execution_count": 36, - "id": "step6-code", + "execution_count": null, + "id": "28", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type:text \"aquatic diving\" (OR — either term)\n", - "\n", - "Score 5.5609 [Arctic_loon] Arctic loon\n", - " The black-throated loon (Gavia arctica), also known as the Arctic loon and the\n", - " black-throated diver, is a migratory aquatic bird found in the northern hemisphere,\n", - " primarily breeding in freshwater lakes in northern Europe and Asia. It winters along\n", - " sheltered, ice-free coasts of the north-east Atlantic Ocean and the eastern and …\n", - "\n", - "Score 4.3182 [American_white_pelican] American white pelican\n", - " The American white pelican (Pelecanus erythrorhynchos) is a large aquatic soaring\n", - " bird from the order Pelecaniformes. It breeds in interior North America, moving\n", - " south and to the coasts, as far as Costa Rica, in winter. The American white\n", - " pelican was formally described in 1789 by the German naturalist Johann Friedrich …\n", - "\n", - "Score 4.2712 [American_coot] American coot\n", - " The American coot (Fulica americana), also known as a mud hen or pouldeau, is a bird\n", - " of the family Rallidae. Though commonly mistaken for ducks, American coots are only\n", - " distantly related to ducks, belonging to a separate order. Unlike the webbed feet of\n", - " ducks, coots have broad, lobed scales on their lower legs and toes that fold back …\n", - "\n", - "Score 3.8841 [Black-capped_donacobius] Black-capped donacobius\n", - " The black-capped donacobius (Donacobius atricapilla) is a conspicuous, vocal South\n", - " American bird. It is distributed across the northern half of South America. In 1760\n", - " the French zoologist Mathurin Jacques Brisson included a description of the black-\n", - " capped donacobius in his Ornithologie based on a specimen that he mistakenly …\n", - "\n", - "Score 3.2792 [Anhinga] Anhinga\n", - " Plotus anhinga Linnaeus, 1766 The anhinga (/ænˈhɪŋɡə/; Anhinga anhinga), sometimes\n", - " called snakebird, darter, American darter, or water turkey, is a water bird of the\n", - " warmer parts of the Americas. The word anhinga comes from a'ñinga in the Brazilian\n", - " Tupi language and means \"devil bird\" or \"snake bird\". The origin of the name is …\n", - "\n", - "query_string body:(aquatic AND diving) (AND — both terms required)\n", - "\n", - "Score 5.5609 [Arctic_loon] Arctic loon\n", - " The black-throated loon (Gavia arctica), also known as the Arctic loon and the\n", - " black-throated diver, is a migratory aquatic bird found in the northern hemisphere,\n", - " primarily breeding in freshwater lakes in northern Europe and Asia. It winters along\n", - " sheltered, ice-free coasts of the north-east Atlantic Ocean and the eastern and …\n", - "\n", - "Score 4.3182 [American_white_pelican] American white pelican\n", - " The American white pelican (Pelecanus erythrorhynchos) is a large aquatic soaring\n", - " bird from the order Pelecaniformes. It breeds in interior North America, moving\n", - " south and to the coasts, as far as Costa Rica, in winter. The American white\n", - " pelican was formally described in 1789 by the German naturalist Johann Friedrich …\n", - "\n", - "Score 4.2712 [American_coot] American coot\n", - " The American coot (Fulica americana), also known as a mud hen or pouldeau, is a bird\n", - " of the family Rallidae. Though commonly mistaken for ducks, American coots are only\n", - " distantly related to ducks, belonging to a separate order. Unlike the webbed feet of\n", - " ducks, coots have broad, lobed scales on their lower legs and toes that fold back …\n", - "\n", - "In OR results but not AND (likely has only one term):\n", - " [Black-capped_donacobius] Black-capped donacobius\n", - " [Anhinga] Anhinga\n" - ] - } - ], + "outputs": [], "source": [ "# ── OR (type:text) vs AND (query_string) ────────────────────────────────────\n", - "QUERY_OR = \"aquatic diving\"\n", + "QUERY_OR = \"aquatic diving\"\n", "QUERY_AND = \"body:(aquatic AND diving)\"\n", "\n", "response_or = idx.documents.search(\n", @@ -1260,10 +1056,10 @@ "print(f'type:text \"{QUERY_OR}\" (OR — either term)\\n')\n", "show_results(response_or, \"body\", 4)\n", "\n", - "print(f'query_string {QUERY_AND} (AND — both terms required)\\n')\n", + "print(f\"query_string {QUERY_AND} (AND — both terms required)\\n\")\n", "show_results(response_and, \"body\", 4)\n", "\n", - "or_ids = {d._id for d in response_or.matches}\n", + "or_ids = {d._id for d in response_or.matches}\n", "and_ids = {d._id for d in response_and.matches}\n", "or_only = or_ids - and_ids\n", "if or_only:\n", @@ -1275,7 +1071,7 @@ }, { "cell_type": "markdown", - "id": "step7-header", + "id": "29", "metadata": {}, "source": [ "## 8. Excluding terms: NOT operator\n", @@ -1288,55 +1084,10 @@ }, { "cell_type": "code", - "execution_count": 55, - "id": "step7-code", + "execution_count": null, + "id": "30", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query_string body:(raptor NOT owl)\n", - "\n", - "Score 4.1918 [American_kestrel] American kestrel\n", - " The American kestrel usually hunts in energy-conserving fashion by perching and\n", - " scanning the ground for prey to ambush, though it also hunts from the air. It\n", - " sometimes hovers in the air with rapid wing beats while homing in on prey. Its diet\n", - " typically consists of grasshoppers and other insects, lizards, mice, and small birds\n", - " (e.g. sparrows). This broad diet has contributed to its wide success as a species.\n", - " It nests in cavities in trees, cliffs, buildings, and other structures. The female\n", - " lays three to seven eggs, which both sexes help to incubate. Its breeding range\n", - " extends from central and western Alaska across northern Canada to Nova Scotia, and\n", - " south throughout North America, into central Mexico and the Caribbean. It is a local\n", - " breeder in Central America and is widely distributed throughout South America. …\n", - "\n", - "Score 2.8799 [American_black_vulture] American black vulture\n", - " The black vulture (Coragyps atratus), also known as the American black vulture,\n", - " Mexican vulture, zopilote, urubu, or gallinazo, is a bird in the New World vulture\n", - " family whose range extends from the southeastern United States to Peru, Central\n", - " Chile and Uruguay in South America. Although a common and widespread species, it has\n", - " a somewhat more restricted distribution than its compatriot, the turkey vulture,\n", - " which breeds well into Canada and all the way south to Tierra del Fuego. It is the\n", - " only extant member of the genus Coragyps, which is in the family Cathartidae.\n", - " Despite the similar name and appearance, this species is not closely related to the\n", - " Eurasian black vulture, an Old World vulture, of the family Accipitridae (which\n", - " includes raptors like the eagles, hawks, kites, and harriers). For ease of …\n", - "\n", - "Score 1.2136 [African_sacred_ibis] African sacred ibis\n", - " The African sacred ibis (Threskiornis aethiopicus) is a species of ibis, a wading\n", - " bird of the family Threskiornithidae. It is native to much of Africa, as well as\n", - " small parts of Iraq, Iran and Kuwait. It is especially known for its role in Ancient\n", - " Egyptian religion, where it was linked to the god Thoth. The species is currently\n", - " extirpated from Egypt. It is very closely related to the black-headed ibis and the\n", - " Australian white ibis, with which it forms a superspecies complex, so much so that\n", - " the three species are considered conspecific by some ornithologists. In mixed flocks\n", - " these ibises often hybridise. The Australian white ibis is often called the sacred\n", - " ibis colloquially. Although known to the ancient civilisations of Greece, Rome and\n", - " especially Africa, ibises were unfamiliar to western Europeans from the fall of …\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# ── NOT: raptors excluding owls ───────────────────────────────────────────\n", "response_not = idx.documents.search(\n", @@ -1346,13 +1097,13 @@ " include_fields=[\"bird_name\", \"body\"],\n", ")\n", "\n", - "print('query_string body:(raptor NOT owl)\\n')\n", + "print(\"query_string body:(raptor NOT owl)\\n\")\n", "show_results(response_not, \"body\")" ] }, { "cell_type": "markdown", - "id": "step8-header", + "id": "31", "metadata": {}, "source": [ "## 9. Exact phrase vs. token OR in `bird_name`\n", @@ -1371,49 +1122,16 @@ }, { "cell_type": "code", - "execution_count": 38, - "id": "step8-code", + "execution_count": null, + "id": "32", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Token OR bird_name:(crested hummingbird)\n", - "\n", - "Score 6.1322 [Antillean_crested_hummingbird] Antillean crested hummingbird\n", - " Trochilus cristatus Linnaeus, 1758\n", - "\n", - "Score 3.3712 [Black-crested_antshrike] Black-crested antshrike\n", - " Lanius canadensis Linnaeus, 1766\n", - "\n", - "Score 3.3712 [Black-crested_coquette] Black-crested coquette\n", - " Ornismya helenae\n", - "\n", - "Score 3.3712 [Black-crested_titmouse] Black-crested titmouse\n", - " The black-crested titmouse or Mexican titmouse (Baeolophus atricristatus), is a\n", - " passerine bird in the tit family Paridae. Once considered a subspecies of the tufted\n", - " titmouse (B. bicolor), it was recognized as a separate species in 2002. It is native\n", - " to southern Texas, Oklahoma, and east-central Mexico. Vagrants have been seen as far\n", - " north and east as St. Louis, Missouri.\n", - "\n", - "Score 3.2266 [Amazilia_hummingbird] Amazilia hummingbird\n", - " Amazilia amazilia\n", - "\n", - "Exact phrase bird_name:(\"crested hummingbird\")\n", - "\n", - "Score 6.1322 [Antillean_crested_hummingbird] Antillean crested hummingbird\n", - " Trochilus cristatus Linnaeus, 1758\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# ── Token OR vs exact phrase in bird_name ───────────────────────────────────\n", "response_tokens = idx.documents.search(\n", " namespace=NAMESPACE,\n", " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": 'bird_name:(crested hummingbird)'}],\n", + " score_by=[{\"type\": \"query_string\", \"query\": \"bird_name:(crested hummingbird)\"}],\n", " include_fields=[\"bird_name\", \"intro\"],\n", ")\n", "\n", @@ -1424,7 +1142,7 @@ " include_fields=[\"bird_name\", \"intro\"],\n", ")\n", "\n", - "print('Token OR bird_name:(crested hummingbird)\\n')\n", + "print(\"Token OR bird_name:(crested hummingbird)\\n\")\n", "show_results(response_tokens, \"intro\")\n", "\n", "print('Exact phrase bird_name:(\"crested hummingbird\")\\n')\n", @@ -1433,7 +1151,7 @@ }, { "cell_type": "markdown", - "id": "step9-header", + "id": "33", "metadata": {}, "source": [ "## 10. Phrase proximity (slop): flexible phrase matching in `body`\n", @@ -1454,593 +1172,10 @@ }, { "cell_type": "code", - "execution_count": 39, - "id": "step9-code", + "execution_count": null, + "id": "34", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Strict phrase body:(\"nest colony\")\n", - "\n", - "Score 2.7594 [Ashy_storm-petrel] Ashy storm-petrel\n", - " The ashy storm petrel was first described by American ornithologist Elliott Coues in\n", - " 1864. Both its common and scientific name, homochroa, \"uniformly colored\", from\n", - " Ancient Greek (h)omoia (όμοια), \"alike\" + \"chroma\" (χρώμα) \"color\", come from its\n", - " coloration. It was formerly defined in the genus Oceanodroma before that genus was\n", - " synonymized with Hydrobates. This is a small, uniformly sooty-brown storm petrel\n", - " with a forked tail, closely resembling the black storm petrel, but it is smaller and\n", - " has a more fluttering style of flight, with the upstroke only becoming horizontal to\n", - " the body before beginning the downstroke (other storm petrels in its range have a\n", - " higher upstroke). It is a gregarious bird at sea, feeding nocturnally on\n", - " cephalopods, fish (particularly the deep-sea myctophids, which rise to the sea's\n", - " surface at night) and euphausiid krill such as Thysanoessa spinifera, which also\n", - " swarm at the surface. They also attend fishing vessels for the fish oils released\n", - " when the nets are pulled. Ashy storm petrels nest in rock burrows on offshore\n", - " islands, returning to the nests at night. The species has a long breeding cycle,\n", - " laying eggs in May and fledging in October, although timing varies greatly, more so\n", - " than in most other storm petrels; some pairs may have a chick that is half grown\n", - " when other pairs are still laying. Like in many other seabirds, pairs show both mate\n", - " and site fidelity, mating in the same pair with the same mate for many years and\n", - " nesting at the same burrow, despite the pairs spending their lives out of the\n", - " breeding season separate from each other, and despite the fact that many individuals\n", - " might seem to compete for burrows at the nesting colonies. A change in mate is\n", - " usually associated with a change in nesting site. The ashy storm petrel is a long-\n", - " lived bird; a banded individual has lived at least 31 years. Ashy storm petrels\n", - " breed on 17 islands in the northeast Pacific, principally off the coast of\n", - " California, but including a few sites off the coast of northwestern Mexico. Half the\n", - " world's population nests on the Farallon Islands near San Francisco. Other breeding\n", - " islands include the eight Channel Islands of California and a small population on\n", - " Mexico's Coronados. Bat Cave, on the north side of Santa Cruz Island in Southern\n", - " California, has the largest nesting colony for the ashy storm petrel in the world,\n", - " with over 100 nests. Outside of the breeding season, it is believed to be more\n", - " widely distributed, foraging on the California Current, but it undertakes no large\n", - " migration and does not range as far as other species of storm petrels. In the early\n", - " fall, large flocks can be seen in Monterey Bay. The birds do not range inland any\n", - " significant distance except when storm-blown; for example, a sighting in San Mateo\n", - " County, California was considered \"unusual\" by an experienced naturalist. The world\n", - " population is estimated to be around 10,000 birds, 8,000 of them breeders, with the\n", - " Farallon population having declined by one-third between 1972 and 1992. The ashy\n", - " storm petrel is designated as a species of conservation concern in California. It is\n", - " threatened by western gull and burrowing owl predation, illumination from fishing\n", - " boats, introduced predators such as rats and feral cats, and pollution. Most of the\n", - " islands where it breeds are covered by some degree of protection. Global warming\n", - " could have a profound impact on ashy storm petrels. Future changes to coastal\n", - " California waters due to global warming could result in warmer, less productive\n", - " waters, which would mean less food would be available for the petrels. In addition,\n", - " ocean acidification may result in a decline of crustacean prey species due to the\n", - " effects that excess CO2 will have on the animals' shells. Sea-level rise will also\n", - " threaten certain nesting sites that would be located too close to water.\n", - "\n", - "Score 2.5735 [American_cliff_swallow] American cliff swallow\n", - " The cliff swallow or American cliff swallow (Petrochelidon pyrrhonota) is a member\n", - " of the passerine bird family Hirundinidae, the swallows and martins. The generic\n", - " name Petrochelidon is derived from the Ancient Greek petros meaning \"stone\" and\n", - " khelidon (χελιδών) \"swallow\", and the specific name pyrrhonota comes from purrhos\n", - " meaning \"flame-coloured\" and -notos \"-backed\". Cliff swallows are extremely social\n", - " songbirds that can be found in large nesting colonies reaching over 2,000 nests.\n", - " They are frequently seen flying overhead in large flocks during migration,\n", - " gracefully foraging over fields for flying insects or perching tightly together on a\n", - " wire preening under the sun. Cliff swallows build gourd-shaped nests made from mud\n", - " with small entrance holes. They build their nests tightly together, on top of one\n", - " another, under bridges or alongside mountain cliffs. Living in large populations,\n", - " these aerial insectivores use extensive vocalizations to communicate warnings or\n", - " food availability to the other individuals. The cliff swallow's average body length\n", - " is 13 cm (5.1 in), and they have short legs and small bills with relatively long\n", - " pointed wings. Adult cliff swallows have an overall dark brownish plumage covering\n", - " both their back and wings, and they have a characteristic white forehead, rich red-\n", - " coloured cheeks with a dark throat, basic white underparts and a buffy-coloured\n", - " rump. In good lighting conditions, their crowns and mantle feathers are iridescent.\n", - " The Northern population is slightly larger in body size and also differs in facial\n", - " markings from the Mexican population of cliff swallows, which have a chocolate-brown\n", - " patch on their foreheads. The male and female have identical plumage, therefore\n", - " sexing them must be done through palpation of the cloaca. During the breeding\n", - " season, the males will have a harder cloaca that is more pronounced because the\n", - " seminal vesicles are swollen. In addition, during incubation females will lose\n", - " feathers on their lower breast to create a warm patch for sitting on their eggs.\n", - " Cliff swallows are similar in body plumage colouring to the related barn swallow\n", - " species but lack the characteristic fork-shaped tail of the barn swallow prominent\n", - " during flight. The cliff swallows have a square-shaped tail. Juvenile cliff\n", - " swallows have an overall similar body plumage colouring to the adults, with paler\n", - " tones. The juveniles lack the iridescent adult plumages, and their foreheads and\n", - " throats appear speckled white. The juvenile cliff swallows' white forehead and\n", - " throat markings have high variance between unrelated individuals compared with those\n", - " from the same clutch. These distinctive white facial markings disappear during\n", - " maturity following their complex-basic moult pattern, because their pre-formative\n", - " plumage is different from the basic plumage. The pre-formative facial plumage has\n", - " been suggested as a possible way for parents nesting in large colonies to recognize\n", - " their chicks. The cliff swallow belongs to the largest order and dominant avian\n", - " group – Passeriformes. They are the perching birds, or the passerines. All the bird\n", - " species in this order have four toes, three pointing forward and one pointing\n", - " backwards (anisodactylous), that enable them to perch with ease. The sub-order that\n", - " the cliff swallow belongs to is Oscines (or Passeri), for the songbirds. The family\n", - " that encompasses approximately 90 species of swallows and martins, Hirundinidae,\n", - " includes birds that have small stream-lined bodies made for great agility and rapid\n", - " flight. Furthermore, those in the family Hirundinidae have short-flat bills for\n", - " their largely insectivorous diets, small feet because they spend much of their time\n", - " in flight and long wings for energy-efficient flight. There are five subspecies of\n", - " cliff swallow distinguished on the basis of plumage colour, body size, and\n", - " distribution – Petrochelidon pyrrhonota pyrrhonota, P. p. melanogaster, P. p.\n", - " tachina, P. p. hypopolia, P. p. ganieri. In addition, three core genera of hirundo\n", - " were established on the basis of molecular studies: Hirundo sensu stricto,\n", - " containing the barn swallow; Cecropis, containing the red-rumped swallow; and\n", - " Petrochelidon, containing the cliff swallow. The genetic tests deemed Petrochelidon\n", - " and Cecropis sister to each other and both closest to Delichon, the house martins.\n", - " Finally, the cave swallow was identified as the nearest living relative in North\n", - " America of the cliff swallow. The cave swallow has a similar plumage to the cliff\n", - " swallow; however, the former has a dark cap and pale throat, and also a much smaller\n", - " distribution in North America, most likely due to a decline in suitable cave sites.\n", - " As their name suggests, throughout history the cliff swallows concentrated their\n", - " nesting colonies along mountain cliffs, primarily by the western North American\n", - " coast. Today, with the development of highways, concrete bridges, and buildings …\n", - "\n", - "Score 1.7089 [Black-crowned_night-heron] Black-crowned night-heron\n", - " The black-crowned night heron (Nycticorax nycticorax) [or black-capped night\n", - " heron[citation needed]], commonly shortened to just night heron in Eurasia, is a\n", - " medium-sized heron found throughout a large part of the world, including parts of\n", - " Europe, Asia, and North and South America. In Australasia it is replaced by the\n", - " closely related Nankeen night heron (N. caledonicus), with which it has hybridised\n", - " in the area of contact. The black-crowned night heron was formally described by the\n", - " Swedish naturalist Carl Linnaeus in 1758 in the tenth edition of his Systema\n", - " Naturae. He placed it with herons, cranes and egrets in the genus Ardea and coined\n", - " the binomial name Ardea nycticorax, based on specimens from southern Europe. It is\n", - " now placed in the genus Nycticorax that was described in 1817 by the English\n", - " naturalist Thomas Forster for this species. The epithet nycticorax is from Ancient\n", - " Greek and combines nux, nuktos meaning \"night\" and korax meaning \"raven\". The word\n", - " was used by authors such as Aristotle and Hesychius of Miletus for a \"bird of ill\n", - " omen\", perhaps an owl. The word was used by the Swiss naturalist Conrad Gessner in\n", - " 1555 and then by subsequent authors for a black-crowned night heron. Four\n", - " subspecies are accepted: In the Falkland Islands, the bird is called quark, which\n", - " is an onomatopoeia similar to its name in many other languages, like qua-bird in\n", - " English, kwak in Dutch and West Frisian, kvakoš noční in Czech, квак in Ukrainian,\n", - " кваква in Russian, vạc in Vietnamese, kowak-malam in Indonesian, hoactli (\"wactli\")\n", - " in Nahuatl (cf. the scientific name of the New World subspecies), and waqwa in\n", - " Quechua. Adults have a black crown and back with the remainder of the body white or\n", - " grey, red eyes, and short yellow legs. They have pale grey wings and white under\n", - " parts. One to eight (mostly two to four) long slender white plumes, erected in\n", - " greeting and courtship displays, extend from the back of the head. The sexes are\n", - " similar in appearance although the males are slightly larger. Black-crowned night\n", - " herons do not fit the typical body form of the heron family. They are relatively\n", - " stocky with shorter bills, legs, and necks than their more familiar cousins, the\n", - " egrets and \"day\" herons. Their resting posture is normally somewhat hunched but when\n", - " hunting they extend their necks and look more like other wading birds. For a short\n", - " period during courtship at the start of the nesting season, the legs of adults turn\n", - " bright salmon-pink, and the bare skin around the eyes blue. The subspecies differ\n", - " little; nominate N. n. nycticorax and N. n. hoactli are particularly similar in\n", - " plumage (some authors have considered N. n. hoactli a synonym of the nominate), but\n", - " the latter is on average slightly larger. N. n obscurus is the most distinctive\n", - " subspecies, clearly darker than N. n. hoactli from further north in South America,\n", - " but N. n. falklandicus is intermediate, with both paler and darker individuals\n", - " occurring. Immature birds have dull grey-brown plumage on their heads, wings, and\n", - " backs, with numerous pale \"teardrop\" spots. Their underparts are paler and streaked\n", - " with brown. Second and third year birds attain plumages increasingly similar to\n", - " adults, but lacking the white head plumes. The young birds have orange eyes and\n", - " duller yellowish-green legs. They are very noisy birds in their nesting colonies,\n", - " with calls that are commonly transcribed as quok or woc. Measurements: The\n", - " breeding habitat is fresh and salt-water wetlands throughout much of the world. The\n", - " nominate subspecies N. n. nycticorax breeds in Europe, Asia and Africa, subspecies\n", - " N. n. hoactli in North and South America from Canada as far south as northern\n", - " Argentina and Chile, N. n. obscurus in southernmost South America, and N. n.\n", - " falklandicus in the Falkland Islands. Black-crowned night herons nest in colonies on\n", - " platforms of sticks in a group of trees, or on the ground in protected locations\n", - " such as islands or reedbeds. Three to eight eggs are laid. This heron is migratory\n", - " in the northern parts of its range, but is otherwise resident (even in the cold\n", - " Patagonia). European birds winter in Africa (with a few staying in southern Spain),\n", - " central and east Asian birds winter in southern Asia, and North American birds\n", - " winters in Mexico, the southern United States, Central America, and the West Indies.\n", - " A colony of the herons has regularly summered at the National Zoo in Washington,\n", - " D.C. for more than a century. The birds also prominently live year-round in the\n", - " shores around the San Francisco Bay, with the largest rookery in Oakland. Their ever\n", - " presence at Oakland's Lake Merritt and throughout the city's downtown area, as well\n", - " as their resilience to the urban environment and displacement efforts, have led to\n", - " them being named Oakland's official city bird. There are two archaeological\n", - " specimens of the black-crowned night heron in Great Britain. The oldest is from …\n", - "\n", - "Score 1.5022 [American_white_pelican] American white pelican\n", - " The American white pelican (Pelecanus erythrorhynchos) is a large aquatic soaring\n", - " bird from the order Pelecaniformes. It breeds in interior North America, moving\n", - " south and to the coasts, as far as Costa Rica, in winter. The American white\n", - " pelican was formally described in 1789 by the German naturalist Johann Friedrich\n", - " Gmelin in his revised and expanded edition of Carl Linnaeus's Systema Naturae. He\n", - " placed it with the other pelicans in the genus Pelecanus and coined the binomial\n", - " name Pelecanus erythrorhynchos. Gmelin based his description on the \"rough-billed\n", - " pelican\" that had been described in 1785 by the English ornithologist John Latham.\n", - " Latham had access to three specimens that had been brought to London from New York\n", - " and the Hudson Bay area of North America. The scientific name means \"red-billed\n", - " pelican\", from the Latin term for a pelican, Pelecanus, and erythrorhynchos, derived\n", - " from the Ancient Greek words erythros (ἐρυθρός, \"red\") + rhynchos (ῥύγχος, \"bill\").\n", - " The species is monotypic: no subspecies are recognised. The American white pelican\n", - " rivals the trumpeter swan, with a similar overall length, as one of the longest\n", - " birds native to North America. Both very large and plump, it has an overall length\n", - " of about 50–70 in (130–180 cm), courtesy of the huge beak which measures 11.3–15.2\n", - " in (290–390 mm) in males and 10.3–14.2 in (260–360 mm) in females. It has a wingspan\n", - " of about 95–120 in (240–300 cm). The species also has the second-largest average\n", - " wingspan of any North American bird, after the California condor. This large\n", - " wingspan allows the bird to easily use soaring flight for migration. Body weight can\n", - " range between 7.7 and 30 lb (3.5 and 13.6 kg), although typically these birds\n", - " average between 11 and 20 lb (5.0 and 9.1 kg). One mean body mass of 15.4 lb (7.0\n", - " kg) was reported. Another study found mean weights to be somewhat lower than\n", - " expected, with eleven males averaging 13.97 lb (6.34 kg) and six females averaging\n", - " 10.95 lb (4.97 kg). Among standard measurements, the wing chord measures 20–26.7 in\n", - " (51–68 cm) and the tarsus measures 3.9–5.4 in (9.9–13.7 cm) long. The plumage is\n", - " almost entirely bright white, except for the black primary and secondary remiges,\n", - " which are hardly visible except in flight. From early spring until after breeding\n", - " has finished in mid-late summer, the breast feathers have a yellowish hue. After\n", - " moulting into the eclipse plumage, the upper head often has a grey hue, as blackish\n", - " feathers grow between the small wispy white crest. The bill is huge and flat on the\n", - " top, with a large throat sac below, and, in the breeding season, is vivid orange in\n", - " color as is the bare skin around the eye and the feet. Iris coloration depends upon\n", - " age and season, ranging from bright white to hazel to blue-gray. In the breeding\n", - " season, both sexes grow a laterally flattened keratinous \"horn\" on the upper bill,\n", - " located about one-third the bill's length behind the tip. This is the only one of\n", - " the eight species of pelican to have a bill \"horn\". The horn is shed after the birds\n", - " have mated and laid their eggs. Outside the breeding season, the bare parts become\n", - " duller in color, with the naked facial skin yellow and the bill, pouch, and feet a\n", - " dull pink-orange. Apart from the difference in size, males and females look exactly\n", - " alike. Immature birds have light grey plumage with darker brownish nape and remiges.\n", - " Their bare parts are dull grey. Chicks are naked at first, then grow white down\n", - " feathers all over, before moulting to the immature plumage. American white pelicans\n", - " nest in colonies of several hundred pairs on islands in remote brackish and\n", - " freshwater lakes of inland North America. The most northerly nesting colony can be\n", - " found on islands in the rapids of the Slave River between Fort Fitzgerald, Alberta,\n", - " and Fort Smith, Northwest Territories. Several groups have been visiting the bird\n", - " sanctuary at Useless Bay in the state of Washington since 2015. About 10–20% of the\n", - " population uses Gunnison Island in the Great Basin's Great Salt Lake as a nesting\n", - " ground. The southernmost colonies are in southeastern Ontario and western Nevada.\n", - " They winter on the Pacific and Gulf of Mexico coasts from central California and\n", - " Florida south to Costa Rica, and along the Mississippi River at least as far north\n", - " as St. Paul, Minnesota. In winter quarters, they are rarely found on the open\n", - " seashore, preferring estuaries, bays, and lakes. They cross deserts and mountains\n", - " but avoid the open ocean on migration. But stray birds, often blown off course by\n", - " hurricanes, have been seen in the Caribbean. In Colombian territory, it was recorded\n", - " first on February 22, 1997, on the San Andrés Island, where they might have been\n", - " swept by Hurricane Marco which passed nearby in November 1996. Since then, there\n", - " have also been a few observations likely to pertain to this species on the Colombian\n", - " mainland, e.g. at Calamar. Wild American white pelicans may live for more than 16 …\n", - "\n", - "Score 1.0705 [American_black_vulture] American black vulture\n", - " The black vulture (Coragyps atratus), also known as the American black vulture,\n", - " Mexican vulture, zopilote, urubu, or gallinazo, is a bird in the New World vulture\n", - " family whose range extends from the southeastern United States to Peru, Central\n", - " Chile and Uruguay in South America. Although a common and widespread species, it has\n", - " a somewhat more restricted distribution than its compatriot, the turkey vulture,\n", - " which breeds well into Canada and all the way south to Tierra del Fuego. It is the\n", - " only extant member of the genus Coragyps, which is in the family Cathartidae.\n", - " Despite the similar name and appearance, this species is not closely related to the\n", - " Eurasian black vulture, an Old World vulture, of the family Accipitridae (which\n", - " includes raptors like the eagles, hawks, kites, and harriers). For ease of locating\n", - " animal corpses (their primary source of sustenance), black vultures tend to inhabit\n", - " relatively open areas with scattered trees, such as chaparral, in addition to\n", - " subtropical forested areas and parts of the Brazilian pantanal. With a wingspan of\n", - " 1.5 m (4.9 ft), the black vulture is an imposing bird, though relatively small for a\n", - " vulture, let alone a raptor. It has black plumage, a featherless, grayish-black head\n", - " and neck, and a short, hooked beak. These features are all evolutionary adaptations\n", - " to life as a scavenger; their black plumage stays visibly cleaner than that of a\n", - " lighter-colored bird, the bare head is designed for easily digging inside animal\n", - " carcasses, and the hooked beak is built for stripping the bodies clean of meat. The\n", - " absence of head feathers helps the birds stay clean and remain (more or less) free\n", - " of animal blood and bodily fluids, which could become problematic for the vultures\n", - " and attract parasites; most vultures are known to bathe after eating, provided there\n", - " is a water source. This water source can be natural or man-made, such as a stream or\n", - " a livestock water tank. The black vulture is a scavenger and feeds on carrion, but\n", - " will also eat eggs, small reptiles, or small newborn animals (livestock such as\n", - " cattle, or deer, rodents, rabbits, etc.), albeit very rarely. They will also\n", - " opportunistically prey on extremely weakened, sick, elderly, or otherwise vulnerable\n", - " animals. In areas populated by humans, it also scavenges at dumpster sites and\n", - " garbage dumps. It finds its meals by using its keen eyesight or following other (New\n", - " World) vultures, which all possess a keen sense of smell. Lacking a syrinx—the vocal\n", - " organ of birds—its only vocalizations are grunts or low hisses. It lays its eggs in\n", - " caves, in cliffside rock crevasses, dead and hollow trees, or, in the absence of\n", - " predators, on the bare ground, generally raising two chicks each year. The parents\n", - " feed their young by regurgitation from their crop, an additional digestive organ\n", - " unique to birds, used for storing excess food; their “infant formula”, of sorts, is\n", - " thus called “crop milk”. In the United States, the vulture receives legal protection\n", - " under the Migratory Bird Treaty Act of 1918. This vulture also appeared in Mayan\n", - " codices. The American naturalist William Bartram wrote of the black vulture in his\n", - " 1791 book Bartram's Travels, calling it Vultur atratus \"black vulture\" or \"carrion\n", - " crow\". Bartram's work has been rejected for nomenclatoríal purposes by the\n", - " International Commission on Zoological Nomenclature as the author did not\n", - " consistently use the system of binomial nomenclature. The German ornithologist\n", - " Johann Matthäus Bechstein formally described the species using the same name in 1793\n", - " in his translation of John Latham's A General Synopsis of Birds. The common name\n", - " \"vulture\" is derived from the Latin word vulturus, which means \"tearer\" and is a\n", - " reference to its feeding habits. The species name, ātrātus, means \"clothed in\n", - " black\", from the Latin āter 'dull black'. Vieillot defined the genus Catharista in\n", - " 1816, listing as its type C. urubu. French naturalist Emmanuel Le Maout placed in\n", - " its current genus Coragyps (as C. urubu) in 1853. Isidore Geoffroy Saint-Hilaire has\n", - " been listed as the author in the past, but he did not publish any official\n", - " description. The genus name means \"raven-vulture\", from a contraction of the Greek\n", - " corax/κόραξ and gyps/γὺψ for the respective birds. The American Ornithologists'\n", - " Union used the name Catharista atrata initially before adopting Vieillot's name\n", - " (Catharista urubu) in their third edition. By their fourth edition, they had adopted\n", - " the current name. The black vulture is basal (the earliest offshoot) to a lineage\n", - " that gave rise to the turkey vulture and greater and lesser yellow-headed vultures,\n", - " diverging around 12 million years ago. Martin Lichtenstein described C. a. foetens,\n", - " the Andean black vulture, in 1817, and Charles Lucien Bonaparte described C. a.\n", - " brasiliensis, from Central and South America, in 1850 on the basis of smaller size\n", - " and minor plumage differences. However, it has been established that the change …\n", - "\n", - "Proximity slop body:(\"nest colony\"~3)\n", - "\n", - "Score 2.8914 [American_cliff_swallow] American cliff swallow\n", - " The cliff swallow or American cliff swallow (Petrochelidon pyrrhonota) is a member\n", - " of the passerine bird family Hirundinidae, the swallows and martins. The generic\n", - " name Petrochelidon is derived from the Ancient Greek petros meaning \"stone\" and\n", - " khelidon (χελιδών) \"swallow\", and the specific name pyrrhonota comes from purrhos\n", - " meaning \"flame-coloured\" and -notos \"-backed\". Cliff swallows are extremely social\n", - " songbirds that can be found in large nesting colonies reaching over 2,000 nests.\n", - " They are frequently seen flying overhead in large flocks during migration,\n", - " gracefully foraging over fields for flying insects or perching tightly together on a\n", - " wire preening under the sun. Cliff swallows build gourd-shaped nests made from mud\n", - " with small entrance holes. They build their nests tightly together, on top of one\n", - " another, under bridges or alongside mountain cliffs. Living in large populations,\n", - " these aerial insectivores use extensive vocalizations to communicate warnings or\n", - " food availability to the other individuals. The cliff swallow's average body length\n", - " is 13 cm (5.1 in), and they have short legs and small bills with relatively long\n", - " pointed wings. Adult cliff swallows have an overall dark brownish plumage covering\n", - " both their back and wings, and they have a characteristic white forehead, rich red-\n", - " coloured cheeks with a dark throat, basic white underparts and a buffy-coloured\n", - " rump. In good lighting conditions, their crowns and mantle feathers are iridescent.\n", - " The Northern population is slightly larger in body size and also differs in facial\n", - " markings from the Mexican population of cliff swallows, which have a chocolate-brown\n", - " patch on their foreheads. The male and female have identical plumage, therefore\n", - " sexing them must be done through palpation of the cloaca. During the breeding\n", - " season, the males will have a harder cloaca that is more pronounced because the\n", - " seminal vesicles are swollen. In addition, during incubation females will lose\n", - " feathers on their lower breast to create a warm patch for sitting on their eggs.\n", - " Cliff swallows are similar in body plumage colouring to the related barn swallow\n", - " species but lack the characteristic fork-shaped tail of the barn swallow prominent\n", - " during flight. The cliff swallows have a square-shaped tail. Juvenile cliff\n", - " swallows have an overall similar body plumage colouring to the adults, with paler\n", - " tones. The juveniles lack the iridescent adult plumages, and their foreheads and\n", - " throats appear speckled white. The juvenile cliff swallows' white forehead and\n", - " throat markings have high variance between unrelated individuals compared with those\n", - " from the same clutch. These distinctive white facial markings disappear during\n", - " maturity following their complex-basic moult pattern, because their pre-formative\n", - " plumage is different from the basic plumage. The pre-formative facial plumage has\n", - " been suggested as a possible way for parents nesting in large colonies to recognize\n", - " their chicks. The cliff swallow belongs to the largest order and dominant avian\n", - " group – Passeriformes. They are the perching birds, or the passerines. All the bird\n", - " species in this order have four toes, three pointing forward and one pointing\n", - " backwards (anisodactylous), that enable them to perch with ease. The sub-order that\n", - " the cliff swallow belongs to is Oscines (or Passeri), for the songbirds. The family\n", - " that encompasses approximately 90 species of swallows and martins, Hirundinidae,\n", - " includes birds that have small stream-lined bodies made for great agility and rapid\n", - " flight. Furthermore, those in the family Hirundinidae have short-flat bills for\n", - " their largely insectivorous diets, small feet because they spend much of their time\n", - " in flight and long wings for energy-efficient flight. There are five subspecies of\n", - " cliff swallow distinguished on the basis of plumage colour, body size, and\n", - " distribution – Petrochelidon pyrrhonota pyrrhonota, P. p. melanogaster, P. p.\n", - " tachina, P. p. hypopolia, P. p. ganieri. In addition, three core genera of hirundo\n", - " were established on the basis of molecular studies: Hirundo sensu stricto,\n", - " containing the barn swallow; Cecropis, containing the red-rumped swallow; and\n", - " Petrochelidon, containing the cliff swallow. The genetic tests deemed Petrochelidon\n", - " and Cecropis sister to each other and both closest to Delichon, the house martins.\n", - " Finally, the cave swallow was identified as the nearest living relative in North\n", - " America of the cliff swallow. The cave swallow has a similar plumage to the cliff\n", - " swallow; however, the former has a dark cap and pale throat, and also a much smaller\n", - " distribution in North America, most likely due to a decline in suitable cave sites.\n", - " As their name suggests, throughout history the cliff swallows concentrated their\n", - " nesting colonies along mountain cliffs, primarily by the western North American\n", - " coast. Today, with the development of highways, concrete bridges, and buildings …\n", - "\n", - "Score 2.7644 [American_white_pelican] American white pelican\n", - " The American white pelican (Pelecanus erythrorhynchos) is a large aquatic soaring\n", - " bird from the order Pelecaniformes. It breeds in interior North America, moving\n", - " south and to the coasts, as far as Costa Rica, in winter. The American white\n", - " pelican was formally described in 1789 by the German naturalist Johann Friedrich\n", - " Gmelin in his revised and expanded edition of Carl Linnaeus's Systema Naturae. He\n", - " placed it with the other pelicans in the genus Pelecanus and coined the binomial\n", - " name Pelecanus erythrorhynchos. Gmelin based his description on the \"rough-billed\n", - " pelican\" that had been described in 1785 by the English ornithologist John Latham.\n", - " Latham had access to three specimens that had been brought to London from New York\n", - " and the Hudson Bay area of North America. The scientific name means \"red-billed\n", - " pelican\", from the Latin term for a pelican, Pelecanus, and erythrorhynchos, derived\n", - " from the Ancient Greek words erythros (ἐρυθρός, \"red\") + rhynchos (ῥύγχος, \"bill\").\n", - " The species is monotypic: no subspecies are recognised. The American white pelican\n", - " rivals the trumpeter swan, with a similar overall length, as one of the longest\n", - " birds native to North America. Both very large and plump, it has an overall length\n", - " of about 50–70 in (130–180 cm), courtesy of the huge beak which measures 11.3–15.2\n", - " in (290–390 mm) in males and 10.3–14.2 in (260–360 mm) in females. It has a wingspan\n", - " of about 95–120 in (240–300 cm). The species also has the second-largest average\n", - " wingspan of any North American bird, after the California condor. This large\n", - " wingspan allows the bird to easily use soaring flight for migration. Body weight can\n", - " range between 7.7 and 30 lb (3.5 and 13.6 kg), although typically these birds\n", - " average between 11 and 20 lb (5.0 and 9.1 kg). One mean body mass of 15.4 lb (7.0\n", - " kg) was reported. Another study found mean weights to be somewhat lower than\n", - " expected, with eleven males averaging 13.97 lb (6.34 kg) and six females averaging\n", - " 10.95 lb (4.97 kg). Among standard measurements, the wing chord measures 20–26.7 in\n", - " (51–68 cm) and the tarsus measures 3.9–5.4 in (9.9–13.7 cm) long. The plumage is\n", - " almost entirely bright white, except for the black primary and secondary remiges,\n", - " which are hardly visible except in flight. From early spring until after breeding\n", - " has finished in mid-late summer, the breast feathers have a yellowish hue. After\n", - " moulting into the eclipse plumage, the upper head often has a grey hue, as blackish\n", - " feathers grow between the small wispy white crest. The bill is huge and flat on the\n", - " top, with a large throat sac below, and, in the breeding season, is vivid orange in\n", - " color as is the bare skin around the eye and the feet. Iris coloration depends upon\n", - " age and season, ranging from bright white to hazel to blue-gray. In the breeding\n", - " season, both sexes grow a laterally flattened keratinous \"horn\" on the upper bill,\n", - " located about one-third the bill's length behind the tip. This is the only one of\n", - " the eight species of pelican to have a bill \"horn\". The horn is shed after the birds\n", - " have mated and laid their eggs. Outside the breeding season, the bare parts become\n", - " duller in color, with the naked facial skin yellow and the bill, pouch, and feet a\n", - " dull pink-orange. Apart from the difference in size, males and females look exactly\n", - " alike. Immature birds have light grey plumage with darker brownish nape and remiges.\n", - " Their bare parts are dull grey. Chicks are naked at first, then grow white down\n", - " feathers all over, before moulting to the immature plumage. American white pelicans\n", - " nest in colonies of several hundred pairs on islands in remote brackish and\n", - " freshwater lakes of inland North America. The most northerly nesting colony can be\n", - " found on islands in the rapids of the Slave River between Fort Fitzgerald, Alberta,\n", - " and Fort Smith, Northwest Territories. Several groups have been visiting the bird\n", - " sanctuary at Useless Bay in the state of Washington since 2015. About 10–20% of the\n", - " population uses Gunnison Island in the Great Basin's Great Salt Lake as a nesting\n", - " ground. The southernmost colonies are in southeastern Ontario and western Nevada.\n", - " They winter on the Pacific and Gulf of Mexico coasts from central California and\n", - " Florida south to Costa Rica, and along the Mississippi River at least as far north\n", - " as St. Paul, Minnesota. In winter quarters, they are rarely found on the open\n", - " seashore, preferring estuaries, bays, and lakes. They cross deserts and mountains\n", - " but avoid the open ocean on migration. But stray birds, often blown off course by\n", - " hurricanes, have been seen in the Caribbean. In Colombian territory, it was recorded\n", - " first on February 22, 1997, on the San Andrés Island, where they might have been\n", - " swept by Hurricane Marco which passed nearby in November 1996. Since then, there\n", - " have also been a few observations likely to pertain to this species on the Colombian\n", - " mainland, e.g. at Calamar. Wild American white pelicans may live for more than 16 …\n", - "\n", - "Score 2.7594 [Ashy_storm-petrel] Ashy storm-petrel\n", - " The ashy storm petrel was first described by American ornithologist Elliott Coues in\n", - " 1864. Both its common and scientific name, homochroa, \"uniformly colored\", from\n", - " Ancient Greek (h)omoia (όμοια), \"alike\" + \"chroma\" (χρώμα) \"color\", come from its\n", - " coloration. It was formerly defined in the genus Oceanodroma before that genus was\n", - " synonymized with Hydrobates. This is a small, uniformly sooty-brown storm petrel\n", - " with a forked tail, closely resembling the black storm petrel, but it is smaller and\n", - " has a more fluttering style of flight, with the upstroke only becoming horizontal to\n", - " the body before beginning the downstroke (other storm petrels in its range have a\n", - " higher upstroke). It is a gregarious bird at sea, feeding nocturnally on\n", - " cephalopods, fish (particularly the deep-sea myctophids, which rise to the sea's\n", - " surface at night) and euphausiid krill such as Thysanoessa spinifera, which also\n", - " swarm at the surface. They also attend fishing vessels for the fish oils released\n", - " when the nets are pulled. Ashy storm petrels nest in rock burrows on offshore\n", - " islands, returning to the nests at night. The species has a long breeding cycle,\n", - " laying eggs in May and fledging in October, although timing varies greatly, more so\n", - " than in most other storm petrels; some pairs may have a chick that is half grown\n", - " when other pairs are still laying. Like in many other seabirds, pairs show both mate\n", - " and site fidelity, mating in the same pair with the same mate for many years and\n", - " nesting at the same burrow, despite the pairs spending their lives out of the\n", - " breeding season separate from each other, and despite the fact that many individuals\n", - " might seem to compete for burrows at the nesting colonies. A change in mate is\n", - " usually associated with a change in nesting site. The ashy storm petrel is a long-\n", - " lived bird; a banded individual has lived at least 31 years. Ashy storm petrels\n", - " breed on 17 islands in the northeast Pacific, principally off the coast of\n", - " California, but including a few sites off the coast of northwestern Mexico. Half the\n", - " world's population nests on the Farallon Islands near San Francisco. Other breeding\n", - " islands include the eight Channel Islands of California and a small population on\n", - " Mexico's Coronados. Bat Cave, on the north side of Santa Cruz Island in Southern\n", - " California, has the largest nesting colony for the ashy storm petrel in the world,\n", - " with over 100 nests. Outside of the breeding season, it is believed to be more\n", - " widely distributed, foraging on the California Current, but it undertakes no large\n", - " migration and does not range as far as other species of storm petrels. In the early\n", - " fall, large flocks can be seen in Monterey Bay. The birds do not range inland any\n", - " significant distance except when storm-blown; for example, a sighting in San Mateo\n", - " County, California was considered \"unusual\" by an experienced naturalist. The world\n", - " population is estimated to be around 10,000 birds, 8,000 of them breeders, with the\n", - " Farallon population having declined by one-third between 1972 and 1992. The ashy\n", - " storm petrel is designated as a species of conservation concern in California. It is\n", - " threatened by western gull and burrowing owl predation, illumination from fishing\n", - " boats, introduced predators such as rats and feral cats, and pollution. Most of the\n", - " islands where it breeds are covered by some degree of protection. Global warming\n", - " could have a profound impact on ashy storm petrels. Future changes to coastal\n", - " California waters due to global warming could result in warmer, less productive\n", - " waters, which would mean less food would be available for the petrels. In addition,\n", - " ocean acidification may result in a decline of crustacean prey species due to the\n", - " effects that excess CO2 will have on the animals' shells. Sea-level rise will also\n", - " threaten certain nesting sites that would be located too close to water.\n", - "\n", - "Score 2.3652 [Black-crowned_night-heron] Black-crowned night-heron\n", - " The black-crowned night heron (Nycticorax nycticorax) [or black-capped night\n", - " heron[citation needed]], commonly shortened to just night heron in Eurasia, is a\n", - " medium-sized heron found throughout a large part of the world, including parts of\n", - " Europe, Asia, and North and South America. In Australasia it is replaced by the\n", - " closely related Nankeen night heron (N. caledonicus), with which it has hybridised\n", - " in the area of contact. The black-crowned night heron was formally described by the\n", - " Swedish naturalist Carl Linnaeus in 1758 in the tenth edition of his Systema\n", - " Naturae. He placed it with herons, cranes and egrets in the genus Ardea and coined\n", - " the binomial name Ardea nycticorax, based on specimens from southern Europe. It is\n", - " now placed in the genus Nycticorax that was described in 1817 by the English\n", - " naturalist Thomas Forster for this species. The epithet nycticorax is from Ancient\n", - " Greek and combines nux, nuktos meaning \"night\" and korax meaning \"raven\". The word\n", - " was used by authors such as Aristotle and Hesychius of Miletus for a \"bird of ill\n", - " omen\", perhaps an owl. The word was used by the Swiss naturalist Conrad Gessner in\n", - " 1555 and then by subsequent authors for a black-crowned night heron. Four\n", - " subspecies are accepted: In the Falkland Islands, the bird is called quark, which\n", - " is an onomatopoeia similar to its name in many other languages, like qua-bird in\n", - " English, kwak in Dutch and West Frisian, kvakoš noční in Czech, квак in Ukrainian,\n", - " кваква in Russian, vạc in Vietnamese, kowak-malam in Indonesian, hoactli (\"wactli\")\n", - " in Nahuatl (cf. the scientific name of the New World subspecies), and waqwa in\n", - " Quechua. Adults have a black crown and back with the remainder of the body white or\n", - " grey, red eyes, and short yellow legs. They have pale grey wings and white under\n", - " parts. One to eight (mostly two to four) long slender white plumes, erected in\n", - " greeting and courtship displays, extend from the back of the head. The sexes are\n", - " similar in appearance although the males are slightly larger. Black-crowned night\n", - " herons do not fit the typical body form of the heron family. They are relatively\n", - " stocky with shorter bills, legs, and necks than their more familiar cousins, the\n", - " egrets and \"day\" herons. Their resting posture is normally somewhat hunched but when\n", - " hunting they extend their necks and look more like other wading birds. For a short\n", - " period during courtship at the start of the nesting season, the legs of adults turn\n", - " bright salmon-pink, and the bare skin around the eyes blue. The subspecies differ\n", - " little; nominate N. n. nycticorax and N. n. hoactli are particularly similar in\n", - " plumage (some authors have considered N. n. hoactli a synonym of the nominate), but\n", - " the latter is on average slightly larger. N. n obscurus is the most distinctive\n", - " subspecies, clearly darker than N. n. hoactli from further north in South America,\n", - " but N. n. falklandicus is intermediate, with both paler and darker individuals\n", - " occurring. Immature birds have dull grey-brown plumage on their heads, wings, and\n", - " backs, with numerous pale \"teardrop\" spots. Their underparts are paler and streaked\n", - " with brown. Second and third year birds attain plumages increasingly similar to\n", - " adults, but lacking the white head plumes. The young birds have orange eyes and\n", - " duller yellowish-green legs. They are very noisy birds in their nesting colonies,\n", - " with calls that are commonly transcribed as quok or woc. Measurements: The\n", - " breeding habitat is fresh and salt-water wetlands throughout much of the world. The\n", - " nominate subspecies N. n. nycticorax breeds in Europe, Asia and Africa, subspecies\n", - " N. n. hoactli in North and South America from Canada as far south as northern\n", - " Argentina and Chile, N. n. obscurus in southernmost South America, and N. n.\n", - " falklandicus in the Falkland Islands. Black-crowned night herons nest in colonies on\n", - " platforms of sticks in a group of trees, or on the ground in protected locations\n", - " such as islands or reedbeds. Three to eight eggs are laid. This heron is migratory\n", - " in the northern parts of its range, but is otherwise resident (even in the cold\n", - " Patagonia). European birds winter in Africa (with a few staying in southern Spain),\n", - " central and east Asian birds winter in southern Asia, and North American birds\n", - " winters in Mexico, the southern United States, Central America, and the West Indies.\n", - " A colony of the herons has regularly summered at the National Zoo in Washington,\n", - " D.C. for more than a century. The birds also prominently live year-round in the\n", - " shores around the San Francisco Bay, with the largest rookery in Oakland. Their ever\n", - " presence at Oakland's Lake Merritt and throughout the city's downtown area, as well\n", - " as their resilience to the urban environment and displacement efforts, have led to\n", - " them being named Oakland's official city bird. There are two archaeological\n", - " specimens of the black-crowned night heron in Great Britain. The oldest is from …\n", - "\n", - "Score 2.2510 [Antillean_palm-swift] Antillean palm-swift\n", - " The Antillean palm swift has two subspecies: the nominate T. p. phoenicobia Gosse,\n", - " 1847, and T. p. iradii (Lembeye, 1850). The Antillean palm swift is 9 to 11 cm (3.5\n", - " to 4.3 in) long and weighs about 9 to 11 g (0.32 to 0.39 oz). It has long narrow\n", - " wings and a medium length forked tail. The sexes are alike. Adults of the nominate\n", - " subspecies have a dark sooty brown crown and nape. Their back, a narrow center strip\n", - " of the rump, uppertail coverts, and tail are sooty black to black. The sides of\n", - " their rump are white. Their wings are sooty blackish with pale edges on the flight\n", - " feathers. Most of their face is grayish brown. Their underparts are mostly dull\n", - " white; their flanks, a narrow band across the breast, and undertail coverts are dark\n", - " sooty brown. Immatures are similar to adults, but their underparts are an even\n", - " duller white, and the flanks and undertail coverts a paler sooty brown. The\n", - " subspecies T. p. iradii is somewhat larger than the nominate, and has a more deeply\n", - " forked tail. Its back is more sooty than black, its face has more extensive grayish\n", - " brown, and its flanks are a paler sooty brown. The nominate subspecies of Antillean\n", - " palm swift is found on Jamaica, Hispaniola, and some small islands off the latter's\n", - " coast. T. p. iradii is found on mainland Cuba and Isla de la Juventud. The species\n", - " has been documented as a vagrant in Florida and there are sight records from Puerto\n", - " Rico, the Cayman Islands, Inagua in The Bahamas, and the Turks and Caicos Islands.\n", - " The Antillean palm swift is seen over dry grassy areas that have patches of palms,\n", - " scrublands, forest, and suburban and urban areas. In elevation, it ranges as high as\n", - " 1,700 m (5,600 ft) on Hispaniola and 1,200 m (3,900 ft) on Jamaica. The Antillean\n", - " palm swift is a year-round resident throughout its range, though individuals have\n", - " wandered outside it. Like all swifts, the Antillean palm swift is an aerial\n", - " insectivore. It forages low to the ground, usually over vegetation, and usually in\n", - " small flocks of its species. It sometimes forages with swallows. Details of its diet\n", - " are lacking. The Antillean palm swift's breeding season on Cuba is May to July, and\n", - " on Hispaniola, from March to May. It makes a hanging pouch nest of plant fibers and\n", - " feathers glued together with saliva and hung on the outside of a dead drooping palm\n", - " frond. It nests in small colonies. The clutch size is two to five; both parents\n", - " incubate the eggs and care for nestlings. The Antillean palm swift's flight call is\n", - " described as \"noisy...an almost constant, weak, twittering, tooee-tooee\". The IUCN\n", - " has assessed the Antillean palm swift as being of Least Concern. It has a large\n", - " range, and though its population size is not known it is believed to be stable. No\n", - " immediate threats have been identified. \"This swift is adaptable to living around\n", - " human habitations, and the planting of decorative palms provides nest sites for\n", - " these birds.\"\n", - "\n", - "Gained by relaxing to slop~3 (not in strict results):\n", - " [Antillean_palm-swift] Antillean palm-swift\n" - ] - } - ], + "outputs": [], "source": [ "# ── Strict phrase vs. proximity phrase ──────────────────────────────────────\n", "response_strict = idx.documents.search(\n", @@ -2064,7 +1199,7 @@ "show_results(response_slop, \"body\", 60)\n", "\n", "strict_ids = {d._id for d in response_strict.matches}\n", - "slop_ids = {d._id for d in response_slop.matches}\n", + "slop_ids = {d._id for d in response_slop.matches}\n", "gained = slop_ids - strict_ids\n", "if gained:\n", " print(\"Gained by relaxing to slop~3 (not in strict results):\")\n", @@ -2077,1228 +1212,270 @@ }, { "cell_type": "markdown", - "id": "step10-header", + "id": "35", "metadata": {}, "source": [ - "## 11. Boosting: influencing ranking by term importance\n", - "\n", - "`^N` multiplies a term’s BM25 score contribution by *N* **without** making it\n", - "required. Documents that lack the boosted term can still appear if they score\n", - "well on the other terms — boosting shapes the ranking, it doesn’t filter.\n", - "\n", - "```\n", - "body:(foraging^3 feeding diet)\n", - "```\n", - "\n", - "- `foraging^3` — three times the weight of an unboosted term\n", - "- `feeding`, `diet` — unboosted; contribute normally\n", - "\n", - "Phrases can be boosted too: `body:(\"aerial foraging\"^2 insects)` boosts the\n", - "exact adjacent phrase rather than a single token." - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "step10-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Boosted body:(foraging^3 feeding diet)\n", - "\n", - "Score 5.8757 [American_white_ibis] American white ibis\n", - " Their diet consists primarily of small aquatic prey, such as insects and small\n", - " fishes. Crayfish are its preferred food in most regions, but it can adjust its diet\n", - " according to the habitat and prey abundance. Its main foraging behavior is probing\n", - " with its beak at the bottom of shallow water to feel for and capture its prey. It …\n", - "\n", - "Score 5.3150 [Black-and-white_warbler] Black-and-white warbler\n", - " The black-and-white warbler (Mniotilta varia) is a species of New World warbler, and\n", - " the only member of its genus, Mniotilta. It breeds in northern and eastern North\n", - " America and winters in Florida, Central America, and the West Indies down to Peru.\n", - " This species is a very rare vagrant to western Europe. Relative to other New …\n", - "\n", - "Score 5.1158 [Arctic_loon] Arctic loon\n", - " The black-throated loon (Gavia arctica), also known as the Arctic loon and the\n", - " black-throated diver, is a migratory aquatic bird found in the northern hemisphere,\n", - " primarily breeding in freshwater lakes in northern Europe and Asia. It winters along\n", - " sheltered, ice-free coasts of the north-east Atlantic Ocean and the eastern and …\n", - "\n", - "Score 4.9510 [Azure-crowned_hummingbird] Azure-crowned hummingbird\n", - " The azure-crowned hummingbird (Saucerottia cyanocephala) is a species of hummingbird\n", - " in the \"emeralds\", tribe Trochilini of subfamily Trochilinae. It is found in Belize,\n", - " El Salvador, Guatemala, Honduras, Mexico, and Nicaragua. The azure-crowned\n", - " hummingbird was originally described as Ornismya cyanocephalus and later moved …\n", - "\n", - "Score 4.8732 [Black-banded_woodcreeper] Black-banded woodcreeper\n", - " The black-banded woodcreeper's taxonomy is unsettled. The International\n", - " Ornithological Committee (IOC) and the Clements taxonomy recognize these 10\n", - " subspecies. Clements arranges them in three groups. \"Spot-throated\" group \"Black-\n", - " banded\" group \"Pale-billed\" group BirdLife International's Handbook of the Birds …\n", - "\n", - "Flat (no boost) body:(foraging feeding diet)\n", - "\n", - "Score 3.3993 [American_white_ibis] American white ibis\n", - " Their diet consists primarily of small aquatic prey, such as insects and small\n", - " fishes. Crayfish are its preferred food in most regions, but it can adjust its diet\n", - " according to the habitat and prey abundance. Its main foraging behavior is probing\n", - " with its beak at the bottom of shallow water to feel for and capture its prey. It …\n", - "\n", - "Score 3.0651 [Arctic_loon] Arctic loon\n", - " The black-throated loon (Gavia arctica), also known as the Arctic loon and the\n", - " black-throated diver, is a migratory aquatic bird found in the northern hemisphere,\n", - " primarily breeding in freshwater lakes in northern Europe and Asia. It winters along\n", - " sheltered, ice-free coasts of the north-east Atlantic Ocean and the eastern and …\n", - "\n", - "Score 2.9660 [Black-and-white_warbler] Black-and-white warbler\n", - " The black-and-white warbler (Mniotilta varia) is a species of New World warbler, and\n", - " the only member of its genus, Mniotilta. It breeds in northern and eastern North\n", - " America and winters in Florida, Central America, and the West Indies down to Peru.\n", - " This species is a very rare vagrant to western Europe. Relative to other New …\n", - "\n", - "Score 2.9043 [Bahama_mockingbird] Bahama mockingbird\n", - " The Bahama mockingbird has two subspecies, the nominate Mimus gundlachii gundlachii\n", - " and M. g. hillii. Its specific epithet honors Juan Gundlach. The Bahama mockingbird\n", - " is 28 cm (11 in) long and weighs between 57 and 85 g (2.0 and 3.0 oz) with an\n", - " average of 66.8 g (2.36 oz). Adults of the nominate subspecies have a mottled face …\n", - "\n", - "Score 2.8421 [Azure-crowned_hummingbird] Azure-crowned hummingbird\n", - " The azure-crowned hummingbird (Saucerottia cyanocephala) is a species of hummingbird\n", - " in the \"emeralds\", tribe Trochilini of subfamily Trochilinae. It is found in Belize,\n", - " El Salvador, Guatemala, Honduras, Mexico, and Nicaragua. The azure-crowned\n", - " hummingbird was originally described as Ornismya cyanocephalus and later moved …\n", - "\n" - ] - } - ], - "source": [ - "# ── Boosting: foraging^3 outweighs feeding / diet ───────────────────────────\n", - "response_boosted = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": \"body:(foraging^3 feeding diet)\"}],\n", - " include_fields=[\"bird_name\", \"body\"],\n", - ")\n", - "\n", - "response_flat = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": \"body:(foraging feeding diet)\"}],\n", - " include_fields=[\"bird_name\", \"body\"],\n", - ")\n", - "\n", - "print(\"Boosted body:(foraging^3 feeding diet)\\n\")\n", - "show_results(response_boosted, \"body\", 4)\n", - "\n", - "print(\"Flat (no boost) body:(foraging feeding diet)\\n\")\n", - "show_results(response_flat, \"body\", 4)" - ] - }, - { - "cell_type": "markdown", - "id": "step11-header", - "metadata": {}, - "source": [ - "## 12. Cross-field `query_string`: multiple fields in one clause\n", - "\n", - "A single `query_string` clause can reference **multiple fields** by combining\n", - "field-scoped sub-clauses with boolean operators.\n", - "\n", - "```\n", - "bird_name:(hawk) AND body:(hunting prey)\n", - "```\n", - "\n", - "The top-level `AND` requires **both** sub-clauses to match: a bird must have\n", - "“hawk” in its name **and** “hunting” or “prey” in its body.\n", - "\n", - "Contrast with the multi-clause `score_by` approach from step 4: that blends\n", - "scores across separate clauses (any clause can match). This enforces a\n", - "**cross-field boolean constraint** in a single expression." - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "step11-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query_string bird_name:(hawk) AND body:(hunting prey)\n", - "\n", - "Score 7.6021 [Bicolored_hawk] Bicolored hawk\n", - " The bicolored hawk was formally described in 1817 by the French ornithologist Louis\n", - " Vieillot under the binomial name Sparvius bicolor. He specified Cayenne in French\n", - " Guiana as the type locality. The bicolored hawk was formerly placed in the large and\n", - " diverse genus Accipiter. In 2024 a comprehensive molecular phylogenetic study of the\n", - " Accipitridae confirmed earlier work that had shown that the genus was polyphyletic.\n", - " To resolve the non-monophyly, Accipiter was divided into six genera. The genus Astur\n", - " was resurrected to accommodate 9 species, including the bicolored hawk, that had\n", - " previously been placed in Accipiter. The resurrected genus had been introduced in\n", - " 1799 by the French naturalist Bernard Germain de Lacépède. The genus name is from\n", - " Latin astur, asturis meaning \"hawk\". The bicolored hawk is also closely related …\n", - "\n", - "Score 7.0616 [Black-and-white_hawk-eagle] Black-and-white hawk-eagle\n", - " The black-and-white hawk-eagle (Spizaetus melanoleucus, formerly Spizastur\n", - " melanoleucus) is a bird of prey species in the eagle and hawk family (Accipitridae).\n", - " It is found throughout a large part of tropical America, from southern Mexico to\n", - " northern Argentina. As its name suggests, this is a black and white eagle,\n", - " resembling the small typical eagles sometimes separated in \"Hieraaetus\". It is some\n", - " 20–24 in (51–61 cm) long overall and weighs about 30 oz (850 g). The head, neck and\n", - " body are white; a small crest forms a black spot on top of the head, and the area\n", - " around the eyes, particularly towards the bill, is also black. The wings are black\n", - " with a noticeable white leading edge, and the bird has a brownish tail barred black-\n", - " dark grey and with white tip. The iris is orange in adults and greyish in …\n", - "\n", - "Score 5.0625 [Black-collared_hawk] Black-collared hawk\n", - " The adult black-collared hawk has a more or less white head, tinged with buff, and\n", - " with black shaft streaks on the crown. The body, above and below, and the mantle are\n", - " bright cinnamon-rufous, paler on the chest. There is a black crescent on the upper\n", - " breast. The back has scattered black shaft stripes; the flight and tail feathers are\n", - " black with the base of the tail barred with rufous. The eyes are bright reddish\n", - " brown, the cere and bill black, and the legs bluish white. Immatures are similar,\n", - " but blotched with black, including on the crown, and the rufous barring on the tail\n", - " is more extensive. The pale area on the chest is also more clearly marked. The upper\n", - " surface of the wings is barred, and the eyes are brown. The nest is usually placed\n", - " in a large tree, frequently near water, but sometimes in shade trees in coffee …\n", - "\n" - ] - } - ], - "source": [ - "# ── Cross-field AND: hawk in name + hunting/prey in body ────────────────────\n", - "response_cross = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\n", - " \"type\": \"query_string\",\n", - " \"query\": 'bird_name:(hawk) AND body:(hunting prey)',\n", - " }],\n", - " include_fields=[\"bird_name\", \"body\"],\n", - ")\n", - "\n", - "print('query_string bird_name:(hawk) AND body:(hunting prey)\\n')\n", - "show_results(response_cross, \"body\")" - ] - }, - { - "cell_type": "markdown", - "id": "step12-header", - "metadata": {}, - "source": [ - "## 13. Combining everything: a production-grade query\n", - "\n", - "Each of the preceding steps introduced one concept. This query composes them\n", - "all into a single expression:\n", - "\n", - "```\n", - "bird_name:(hawk^2 OR eagle) AND\n", - "body:((\"dense vegetation\" OR \"forest canopy\") AND hunt -fish)\n", - "```\n", - "\n", - "Clause by clause:\n", - "\n", - "| Clause | Concept |\n", - "|---|---|\n", - "| `bird_name:(hawk^2 OR eagle)` | boost (step 10) + token OR on `bird_name` (step 8) |\n", - "| `AND` | cross-field boolean (step 11) |\n", - "| `\"dense vegetation\" OR \"forest canopy\"` | exact phrases targeting forest-interior hunters (step 8) |\n", - "| `AND hunt` | require term — AND operator (step 6) |\n", - "| `-fish` | exclude term — NOT operator (step 7) |\n", - "\n", - "The intent is **forest-interior hawks and eagles that actively hunt, excluding\n", - "piscivorous species**. Two exclusions work at different levels:\n", - "\n", - "- **Name-level:** using `eagle` instead of `falcon` keeps open-country falcons\n", - " like the Aplomado out of the candidate set entirely.\n", - "- **Body-level:** `-fish` hard-filters the Black-collared hawk, whose diet\n", - " is described as \"mainly composed of fish\".\n", - "\n", - "Expected top results: the Bicolored hawk (\"flying through dense vegetation to\n", - "ambush unsuspecting prey\") and the Black-and-white hawk-eagle (\"nests in the\n", - "forest canopy\").\n", - "\n", - "> **Stemming:** `body` has stemming on, so `hunt` also matches *hunting* and\n", - "> *hunted* — no extra syntax needed.\n", - ">\n", - "> **Operator precedence:** AND binds tighter than OR. Use explicit\n", - "> parentheses when mixing to avoid surprises." - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "step12-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query_string bird_name:(hawk^2 OR eagle) AND body:((\"dense vegetation\" OR \"forest canopy\") AND hunt -fish)\n", - "\n", - "Score 14.2552 [Black-and-white_hawk-eagle] Black-and-white hawk-eagle\n", - " The black-and-white hawk-eagle (Spizaetus melanoleucus, formerly Spizastur\n", - " melanoleucus) is a bird of prey species in the eagle and hawk family (Accipitridae).\n", - " It is found throughout a large part of tropical America, from southern Mexico to\n", - " northern Argentina. As its name suggests, this is a black and white eagle,\n", - " resembling the small typical eagles sometimes separated in \"Hieraaetus\". It is some\n", - " 20–24 in (51–61 cm) long overall and weighs about 30 oz (850 g). The head, neck and\n", - " body are white; a small crest forms a black spot on top of the head, and the area\n", - " around the eyes, particularly towards the bill, is also black. The wings are black\n", - " with a noticeable white leading edge, and the bird has a brownish tail barred black-\n", - " dark grey and with white tip. The iris is orange in adults and greyish in …\n", - "\n", - "Score 12.9971 [Bicolored_hawk] Bicolored hawk\n", - " The bicolored hawk was formally described in 1817 by the French ornithologist Louis\n", - " Vieillot under the binomial name Sparvius bicolor. He specified Cayenne in French\n", - " Guiana as the type locality. The bicolored hawk was formerly placed in the large and\n", - " diverse genus Accipiter. In 2024 a comprehensive molecular phylogenetic study of the\n", - " Accipitridae confirmed earlier work that had shown that the genus was polyphyletic.\n", - " To resolve the non-monophyly, Accipiter was divided into six genera. The genus Astur\n", - " was resurrected to accommodate 9 species, including the bicolored hawk, that had\n", - " previously been placed in Accipiter. The resurrected genus had been introduced in\n", - " 1799 by the French naturalist Bernard Germain de Lacépède. The genus name is from\n", - " Latin astur, asturis meaning \"hawk\". The bicolored hawk is also closely related …\n", - "\n" - ] - } - ], - "source": [ - "# ── Composed query: forest hawks/eagles that hunt, no fish-eating specialists ─\n", - "COMPOSED = (\n", - " 'bird_name:(hawk^2 OR eagle) AND '\n", - " 'body:((\"dense vegetation\" OR \"forest canopy\") AND hunt -fish)'\n", - ")\n", - "\n", - "response_composed = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": COMPOSED}],\n", - " include_fields=[\"bird_name\", \"body\"],\n", - ")\n", - "\n", - "print(f'query_string {COMPOSED}\\n')\n", - "show_results(response_composed, \"body\")" - ] - }, - { - "cell_type": "markdown", - "id": "b2afeba3", - "metadata": {}, - "source": [ - "## 14. Regex: token-level pattern matching\n", - "\n", - "Lucene `/pattern/` regex syntax works inside `query_string` clauses. Wrap the\n", - "pattern in forward slashes inside the field clause:\n", - "\n", - "```\n", - "bird_name:/.*bird/\n", - "```\n", - "\n", - "**Key constraint:** the pattern matches the **entire indexed token**, not the\n", - "full field string. `bird_name:/.*bird/` asks — for each token in `bird_name`,\n", - "does it match `.*bird` end-to-end?\n", - "\n", - "This unlocks suffix matching that no other query type supports. A simple\n", - "token search for `bird_name:(bird)` only finds documents where \"bird\" is a\n", - "standalone token — no bird in this corpus has that. Regex finds every compound\n", - "word that ends in \"bird\": `hummingbird`, `mockingbird`, `puffbird`, etc.\n", - "\n", - "> **Stemming note:** `bird_name` has stemming **off**, so regex operates on\n", - "> the raw lowercased tokens — no morphological expansion." - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "9ea6c059", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "token bird_name:(bird)\n", - "\n", - "(no matches)\n", - "regex bird_name:/.*bird/\n", - "\n", - "Score 1.0000 [Amazilia_hummingbird] Amazilia hummingbird\n", - " Amazilia hummingbird\n", - "\n", - "Score 1.0000 [Amethyst-throated_hummingbird] Amethyst-throated hummingbird\n", - " Amethyst-throated hummingbird\n", - "\n", - "Score 1.0000 [Anna%27s_hummingbird] Anna%27s hummingbird\n", - " Anna%27s hummingbird\n", - "\n", - "Score 1.0000 [Antillean_crested_hummingbird] Antillean crested hummingbird\n", - " Antillean crested hummingbird\n", - "\n", - "Score 1.0000 [Allen%27s_hummingbird] Allen%27s hummingbird\n", - " Allen%27s hummingbird\n", - "\n", - "Found by regex but not by token search (compound tokens ending in 'bird'):\n", - " [Amazilia_hummingbird] Amazilia hummingbird\n", - " [Amethyst-throated_hummingbird] Amethyst-throated hummingbird\n", - " [Anna%27s_hummingbird] Anna%27s hummingbird\n", - " [Antillean_crested_hummingbird] Antillean crested hummingbird\n", - " [Allen%27s_hummingbird] Allen%27s hummingbird\n" - ] - } - ], - "source": [ - "# ── Token search: \"bird\" as a standalone token ───────────────────────────────\n", - "# No bird in this corpus has \"bird\" as a separate token in its name —\n", - "# hummingbird, mockingbird, puffbird, etc. are all single compound tokens.\n", - "response_token = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": \"bird_name:(bird)\"}],\n", - " include_fields=[\"bird_name\"],\n", - ")\n", - "\n", - "print('token bird_name:(bird)\\n')\n", - "show_results(response_token, \"bird_name\")\n", - "\n", - "# ── Regex suffix: every token ending in \"bird\" ────────────────────────────────\n", - "# The pattern /.*bird/ matches the full token end-to-end, so it catches any\n", - "# compound word whose final characters are \"bird\".\n", - "response_regex = idx.documents.search(\n", - " namespace=NAMESPACE,\n", - " top_k=5,\n", - " score_by=[{\"type\": \"query_string\", \"query\": \"bird_name:/.*bird/\"}],\n", - " include_fields=[\"bird_name\"],\n", - ")\n", - "\n", - "print('regex bird_name:/.*bird/\\n')\n", - "show_results(response_regex, \"bird_name\")\n", - "\n", - "token_ids = {d._id for d in response_token.matches}\n", - "regex_ids = {d._id for d in response_regex.matches}\n", - "gained = regex_ids - token_ids\n", - "if gained:\n", - " print(\"Found by regex but not by token search (compound tokens ending in 'bird'):\")\n", - " for doc in response_regex.matches:\n", - " if doc._id in gained:\n", - " print(f\" [{doc._id}] {doc.get('bird_name')}\")" - ] - }, - { - "cell_type": "markdown", - "id": "83d50f92", - "metadata": {}, - "source": [ - "## 15. Phrase prefix: autocomplete\n", - "\n", - "Appending `*` after a quoted phrase treats the **last token as a prefix**.\n", - "All preceding tokens must match exactly and adjacently; only the final token\n", - "is expanded.\n", - "\n", - "`body:(\"tropcial fo\"*)` matches:\n", - "- \"tropical forest\"\n", - "- \"tropics. Foraging\" (when stemming is on)\n", - "\n", - "But not:\n", - "- \"tropical rain forest\"\n", - "\n", - "> **Constraint:** single-term prefix wildcards (`tropic*`) are not supported —\n", - "> the phrase must contain at least two tokens before the `*`.\n", - ">\n", - "> **Practical use:** power an autocomplete UI widget by passing the partial\n", - "> query string directly into this pattern as the user types." - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "step13-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query_string body:(\"tropical fo\"*)\n", - "\n", - "Score 1.0000 [Black-and-white_owl] Black-and-white owl\n", - " The black-and-white owl (Strix nigrolineata) is a species of owl in the family\n", - " Strigidae. The black-and-white owl is a medium-sized owl with a round head and no\n", - " ear tufts. It is between 35 and 40 cm in length and weigh between 400 and 535 grams.\n", - " As for most owl species, females are usually bigger than males with an average\n", - " weight of 487 g and 418 g respectively. It has a striped black-and-white breast,\n", - " belly, and vent. With the exception of a black-and white striped collar, the\n", - " upperparts from the crown to the tail are a sooty black. The facial disc is mostly\n", - " sooty black, with white \"eyebrows\" that extend from the bill to the collar. The beak\n", - " is a yellow-orange colour, and the eyes are a reddish brown. Chicks are downy and\n", - " white. Juveniles have a whitish face, dark brown upper-parts and a white-barred\n", - " black underside. Formerly under the genus Ciccaba which includes many neotropical\n", - " species, the black-and-white owl is now classified under the genus Strix known as\n", - " the \"wood owls\", which all share the same round head and pitch black eyes. This\n", - " raptor was first reported in 1859 by Sclater. Its sister species, the black-banded\n", - " owl (Strix hulula), while being similar, is smaller and has a darker plumage. Plus,\n", - " it occupies a different range which includes the southern tropical forests of South\n", - " America. The black-and-white owl is mostly found in gallery forests and rainforest,\n", - " but is also found in wet deciduous and mangrove forests, usually at an altitude\n", - " between sea level and 2400 meters. Small ponds are also often visited by this\n", - " species when hunting. It usually nests in the foliage of large, tall trees such as\n", - " mahogany. This owl is not afraid of living near human habitations. Its range\n", - " extends from central Mexico south to the northwestern section of Peru and western\n", - " Colombia, a range it partially shares with another related species: the mottled owl\n", - " (Strix virgata). In total, it is found in 12 countries: Belize, Colombia, Costa\n", - " Rica, Ecuador, El Salvador, Guatemala, Honduras, Mexico, Nicaragua, Panama, Peru,\n", - " and Venezuela. This bird of prey also stays faithful to its range all year long as\n", - " it is a non-migratory bird. This neotropical bird is a nocturnal hunter and since\n", - " most of its prey can fly, it forages mostly at the canopy level of its habitat. It\n", - " will first examine its surroundings by perching on an elevated branch. Then it will\n", - " make short, silent flights to catch its food. Primarily insectivorous, the black-\n", - " and-white owl prefers scarab beetles (Scarabaedidae) such as dung beetles and\n", - " sometimes prey upon orthopterans and cicadas (Cicadidae). Bats such as the Jamaican\n", - " fruit bat (Artibeus jamaicensis) and other small rodents also make up a large part\n", - " of its diet. Furthermore, it can occasionally feed on smaller birds like trushes and\n", - " tanagers as well as amphibians. It is also one of the only tropical owl species\n", - " reported to capture barn swallows since those are easy to catch when roosting on\n", - " nearby electric lines. Its call consists of a series of rapid, guttural, low calls,\n", - " followed by a short pause and a low, airy call and a faint, short hoot.\n", - " Occasionally, it is shortened to just the last two notes, leaving out the opening\n", - " series. Moreover, the female's call usually sounds louder than the male's and\n", - " individuals make fainter \"hoots\" near their nest. Just like their parents,\n", - " younglings can produce strident cries, but also communicate by clacking their beak.\n", - " When the breeding season begins in March, the monogamous pair forms after the male\n", - " successfully seduces the female with wing flaps and elaborated acrobatic flights.\n", - " Then, before the severe downpours begin, the couple settles on an isolated tree to\n", - " protect their offspring from climbing predators and use epiphytes and flower\n", - " arrangements (e.g., orchids) as their nest. While the female incubates the clutch of\n", - " one or two eggs, the male goes foraging for the pair and fiercely defends the nest,\n", - " even from nearby humans. Sometimes, the female will tag along with her mate when she\n", - " is not coveting. The eggs are dull, whitish and weight about 33.8 g (usually 6% of\n", - " the female's body mass) with an average length and width of 46.4 and 38.4 mm (1.83\n", - " and 1.51 in) respectively. The black-and-white owl is one of the only owls to lay a\n", - " single egg which relates to the fact that clutch size lowers as a species lives\n", - " closer to the Equator, thus explaining its low reproductive success. After an\n", - " incubation period of at least 30 days, the chicks hatch in April. They harbour white\n", - " down feathers, pink feet and beak and weight around 28 g after 2 days. They first\n", - " open their eyes at 14 days old as the black bars on their wings start to develop.\n", - " Chicks also have a low chance of survival as they can be preyed upon by tayras,\n", - " ocelots, coatis, falcons and hawks. The black-and-white owl is classified as \"Least\n", - " Concern\" on IUCN's Red List even if its populations are decreasing. Mexico's\n", - " population in particular observed a decline of 50% in the last century which has\n", - " made the species a big concern for the Partners in Flight association. This bird of\n", - " prey's main threat is the loss of its wetland and forest habitats which are\n", - " progressively converted into agricultural lands. Human proximity also causes their\n", - " populations to decrease.\n", - "\n", - "Score 1.0000 [Bald_eagle] Bald eagle\n", - " Falco ossifragus Shaw, 1809 (nec Linnaeus) The bald eagle (Haliaeetus\n", - " leucocephalus) is a bird of prey found in North America. A sea eagle, it has two\n", - " known subspecies and forms a species pair with the white-tailed eagle (Haliaeetus\n", - " albicilla), which occupies the same niche as the bald eagle in the Palearctic. Its\n", - " range includes most of Canada and Alaska, all of the contiguous United States, and\n", - " northern Mexico. It is found near large bodies of open water with an abundant food\n", - " supply and old-growth trees for nesting. The bald eagle is an opportunistic feeder\n", - " which subsists mainly on fish, which it swoops down upon and snatches from the water\n", - " with its talons. It builds the largest nest of any North American bird and the\n", - " largest tree nests ever recorded for any animal species, up to 4 m (13 ft) deep, 2.5\n", - " m (8.2 ft) wide, and 1 metric ton (1.1 short tons) in weight. Sexual maturity is\n", - " attained at the age of four to five years. Bald eagles are not bald; the name\n", - " derives from an older meaning of the word, \"white-headed\". The adult is mainly brown\n", - " with a white head and tail. The sexes are identical in plumage, but females are\n", - " about 25 percent larger than males. The yellow beak is large and hooked. The plumage\n", - " of the immature is brown. The bald eagle is the national symbol of the United\n", - " States and appears on its seal. In the late 20th century it was on the brink of\n", - " extirpation in the contiguous United States, but measures such as banning the\n", - " practice of hunting bald eagles and banning the use of the harmful pesticide DDT\n", - " slowed the decline of their population. Populations have since recovered, and the\n", - " species' status was upgraded from \"endangered\" to \"threatened\" in 1995 and removed\n", - " from the list altogether in 2007. In 2024, the bald eagle was officially made the\n", - " national bird of the United States. The bald eagle is placed in the genus\n", - " Haliaeetus (sea eagles), and gets both its common and specific scientific names from\n", - " the distinctive appearance of the adult's head. Bald in the English name is from an\n", - " older usage meaning \"having white on the face or head\" rather than \"hairless\",\n", - " referring to the white head feathers contrasting with the darker body. The genus\n", - " name is Neo-Latin: Haliaeetus (from the Ancient Greek: ἁλιάετος, romanized:\n", - " haliaetos, lit. 'sea eagle'), and the specific name, leucocephalus, is Latinized\n", - " (Ancient Greek: λευκός, romanized: leukos, lit. 'white') and (κεφαλή, kephalḗ,\n", - " 'head'). The bald eagle was one of the many species originally described by Carl\n", - " Linnaeus in his 18th-century work Systema Naturae, under the name Falco\n", - " leucocephalus. The bald eagle forms a species pair with the white-tailed eagle of\n", - " Eurasia. This species pair consists of a white-headed and a tan-headed species of\n", - " roughly equal size; the white-tailed eagle also has overall somewhat paler brown\n", - " body plumage. The two species fill the same ecological niche in their respective\n", - " ranges. The pair diverged from other sea eagles at the beginning of the Early\n", - " Miocene (c. 10 Ma BP) at the latest, but possibly as early as the Early/Middle\n", - " Oligocene, 28 Ma BP, if the most ancient fossil record is correctly assigned to this\n", - " genus. There are two recognized subspecies of bald eagle: The plumage of an adult\n", - " bald eagle is evenly dark brown with a white head and tail. The tail is moderately\n", - " long and slightly wedge-shaped. Males and females are identical in plumage\n", - " coloration, but sexual dimorphism is evident in the species, in that females are 25%\n", - " larger than males. The beak, feet and irises are bright yellow. The legs are\n", - " feather-free, and the toes are short and powerful with large talons. The highly\n", - " developed talon of the hind toe is used to pierce the vital areas of prey while it\n", - " is held immobile by the front toes. The beak is large and hooked, with a yellow\n", - " cere. The adult bald eagle is unmistakable in its native range. The closely related\n", - " African fish eagle (Haliaeetus vocifer) (from far outside the bald eagle's range)\n", - " also has a brown body (albeit of somewhat more rufous hue), white head and tail, but\n", - " differs from the bald eagle in having a white chest and black tip to the bill. The\n", - " plumage of the immature is a dark brown overlaid with messy white streaking until\n", - " the fifth (rarely fourth, very rarely third) year, when it reaches sexual maturity.\n", - " Immature bald eagles are distinguishable from the golden eagle (Aquila chrysaetos),\n", - " the only other very large, non-vulturine raptorial bird in North America, in that\n", - " the former has a larger, more protruding head with a larger beak, straighter edged\n", - " wings which are held flat (not slightly raised) and with a stiffer wing beat and\n", - " feathers which do not completely cover the legs. When seen well, the golden eagle is\n", - " distinctive in plumage with a more solid warm brown color than an immature bald\n", - " eagle, with a reddish-golden patch to its nape and (in immature birds) a highly\n", - " contrasting set of white squares on the wing. The bald eagle has sometimes been\n", - " considered the largest true raptor (accipitrid) in North America. The only larger\n", - " species of raptor-like bird is the California condor (Gymnogyps californianus), a\n", - " New World vulture which today is not generally considered a taxonomic ally of true\n", - " accipitrids. However, the golden eagle, averaging 4.18 kg (9.2 lb) and 63 cm (25 in)\n", - " in wing chord length in its American race (Aquila chrysaetos canadensis), is merely\n", - " 455 g (1.003 lb) lighter in mean body mass and exceeds the bald eagle in mean wing\n", - " chord length by around 3 cm (1.2 in). Additionally, the bald eagle's close cousins,\n", - " the relatively longer-winged but shorter-tailed white-tailed eagle and the overall\n", - " larger Steller's sea eagle (Haliaeetus pelagicus), may, rarely, wander to coastal\n", - " Alaska from Asia. The bald eagle has a body length of 70–102 cm (28–40 in). Typical\n", - " wingspan is between 1.8 and 2.3 m (5 ft 11 in and 7 ft 7 in) and mass is normally\n", - " between 3 and 6.3 kg (6.6 and 13.9 lb). Females are about 25% larger than males,\n", - " averaging as much as 5.6 kg (12 lb), and against the males' average weight of 4.1 kg\n", - " (9.0 lb). The size of the bird varies by location and generally corresponds with\n", - " Bergmann's rule: the species increases in size further away from the equator and the\n", - " tropics. For example, eagles from South Carolina average 3.27 kg (7.2 lb) in mass\n", - " and 1.88 m (6 ft 2 in) in wingspan, smaller than their northern counterparts. One\n", - " field guide in Florida listed similarly small sizes for bald eagles there, at about\n", - " 4.13 kg (9.1 lb). Of intermediate size, 117 migrant bald eagles in Glacier National\n", - " Park were found to average 4.22 kg (9.3 lb) but this was mostly (possibly post-\n", - " dispersal) juvenile eagles, with 6 adults here averaging 4.3 kg (9.5 lb). Wintering\n", - " eagles in Arizona (winter weights are usually the highest of the year since, like\n", - " many raptors, they spend the highest percentage of time foraging during winter) were\n", - " found to average 4.74 kg (10.4 lb). The largest eagles are from Alaska, where large\n", - " females may weigh more than 7 kg (15 lb) and span 2.44 m (8 ft 0 in) across the\n", - " wings. A survey of adult weights in Alaska showed that females there weighed on\n", - " average 5.35 kg (11.8 lb), respectively, and males weighed 4.23 kg (9.3 lb) against\n", - " immatures which averaged 5.09 kg (11.2 lb) and 4.05 kg (8.9 lb) in the two sexes. An\n", - " Alaskan adult female eagle that was considered outsized weighed some 7.4 kg (16 lb).\n", - " R.S. Palmer listed a record from 1876 in Wyoming County, New York of an enormous\n", - " adult bald eagle that was shot and reportedly scaled 8.2 kg (18 lb). Among standard\n", - " linear measurements, the wing chord is 51.5–69 cm (20.3–27.2 in), the tail is 23–37\n", - " cm (9.1–14.6 in) long, and the tarsus is 8 to 11 cm (3.1 to 4.3 in). The culmen\n", - " reportedly ranges from 3 to 7.5 cm (1.2 to 3.0 in), while the measurement from the\n", - " gape to the tip of the bill is 7–9 cm (2.8–3.5 in). The bill size is unusually\n", - " variable: Alaskan eagles can have up to twice the bill length of birds from the\n", - " southern United States (Georgia, Louisiana, Florida), with means including both\n", - " sexes of 6.83 cm (2.69 in) and 4.12 cm (1.62 in) in culmen length, respectively,\n", - " from these two areas. The call consists of weak staccato, chirping whistles, kleek\n", - " kik ik ik ik, somewhat similar in cadence to a gull's call. The calls of young birds\n", - " tend to be more harsh and shrill than those of adults. The bald eagle's natural\n", - " range covers most of North America, including most of Canada, all of the continental\n", - " United States, and northern Mexico. It is the only sea eagle endemic to North\n", - " America. Occupying varied habitats from the bayous of Louisiana to the Sonoran\n", - " Desert and the eastern deciduous forests of Quebec and New England, northern birds\n", - " are migratory, while southern birds are resident, remaining on their breeding\n", - " territory all year. At minimum population, in the 1950s, it was largely restricted\n", - " to Alaska, the Aleutian Islands, northern and eastern Canada, and Florida. From 1966\n", - " to 2015 bald eagle numbers increased substantially throughout its winter and\n", - " breeding ranges, and as of 2018 the species nests in every continental state and\n", - " province in the United States and Canada. The majority of bald eagles in Canada are\n", - " found along the British Columbia coast while large populations are found in the\n", - " forests of Alberta, Saskatchewan, Manitoba and Ontario. Bald eagles also congregate\n", - " in certain locations in winter. From November until February, one to two thousand\n", - " birds winter in Squamish, British Columbia, about halfway between Vancouver and\n", - " Whistler. In March 2024, bald eagles were found nesting in Toronto for the first\n", - " time. The birds primarily gather along the Squamish and Cheakamus Rivers, attracted\n", - " by the salmon spawning in the area. Similar congregations of wintering bald eagles\n", - " at open lakes and rivers, wherein fish are readily available for hunting or\n", - " scavenging, are observed in the northern United States. It has occurred as a\n", - " vagrant twice in Ireland; a juvenile was shot illegally in Fermanagh on January 11,\n", - " 1973 (misidentified at first as a white-tailed eagle), and an exhausted juvenile was\n", - " captured near Castleisland, County Kerry on November 15, 1987. There is also a\n", - " record of it from Llyn Coron, Anglesey, in the United Kingdom, from October 17,\n", - " 1978; the provenance of this individual eagle has remained in dispute. The bald\n", - " eagle occurs during its breeding season in virtually any kind of American wetland\n", - " habitat such as seacoasts, rivers, large lakes or marshes or other large bodies of\n", - " open water with an abundance of fish. Studies have shown a preference for bodies of\n", - " water with a circumference greater than 11 km (7 mi), and lakes with an area greater\n", - " than 10 km2 (4 sq mi) are optimal for breeding bald eagles. The bald eagle\n", - " typically requires old-growth and mature stands of coniferous or hardwood trees for\n", - " perching, roosting, and nesting. Tree species reportedly is less important to the\n", - " eagle pair than the tree's height, composition and location. Perhaps of paramount\n", - " importance for this species is an abundance of comparatively large trees surrounding\n", - " the body of water. Selected trees must have good visibility, be over 20 m (66 ft)\n", - " tall, an open structure, and proximity to prey. If nesting trees are in standing\n", - " water such as in a mangrove swamp, the nest can be located fairly low, at as low as\n", - " 6 m (20 ft) above the ground. In a more typical tree standing on dry ground, nests\n", - " may be located from 16 to 38 m (52 to 125 ft) in height. In Chesapeake Bay, nesting\n", - " trees averaged 82 cm (32 in) in diameter and 28 m (92 ft) in total height, while in\n", - " Florida, the average nesting tree stands 23 m (75 ft) high and is 23 cm (9.1 in) in\n", - " diameter. Trees used for nesting in the Greater Yellowstone area average 27 m (89\n", - " ft) high. Trees or forest used for nesting should have a canopy cover of no more\n", - " than 60%, and no less than 20%, and be in close proximity to water. Most nests have\n", - " been found within 200 m (660 ft) of open water. The greatest distance from open\n", - " water recorded for a bald eagle nest was over 3 km (1.9 mi), in Florida. Bald eagle\n", - " nests are often very large in order to compensate for size of the birds. The largest\n", - " recorded nest was found in Florida in 1963, and was measured at 2.9 m (9.5 ft) wide\n", - " and 6.1 m (20 ft) deep. In Florida, nesting habitats often consist of mangrove\n", - " swamps, the shorelines of lakes and rivers, pinelands, seasonally flooded flatwoods,\n", - " hardwood swamps, and open prairies and pastureland with scattered tall trees.\n", - " Favored nesting trees in Florida are slash pines (Pinus elliottii), longleaf pines\n", - " (P. palustris), loblolly pines (P. taeda) and cypress trees, but for the southern\n", - " coastal areas where mangroves are usually used. In Wyoming, groves of mature\n", - " cottonwoods or tall pines found along streams and rivers are typical bald eagle\n", - " nesting habitats. Wyoming eagles may inhabit habitat types ranging from large, old-\n", - " growth stands of ponderosa pines (Pinus ponderosa) to narrow strips of riparian\n", - " trees surrounded by rangeland. In Southeast Alaska, Sitka spruce (Picea sitchensis)\n", - " provided 78% of the nesting trees used by eagles, followed by hemlocks (Tsuga) at\n", - " 20%. Increasingly, eagles nest in human-made reservoirs stocked with fish. The bald\n", - " eagle is usually quite sensitive to human activity while nesting, and is found most\n", - " commonly in areas with minimal human disturbance. It chooses sites more than 1.2 km\n", - " (0.75 mi) from low-density human disturbance and more than 1.8 km (1.1 mi) from\n", - " medium- to high-density human disturbance. However, bald eagles will occasionally\n", - " nest in large estuaries or secluded groves within major cities, such as Hardtack\n", - " Island on the Willamette River in Portland, Oregon or John Heinz National Wildlife\n", - " Refuge at Tinicum in Philadelphia, Pennsylvania, which are surrounded by a great\n", - " quantity of human activity. Even more contrary to the usual sensitivity to\n", - " disturbance, a family of bald eagles moved to the Harlem neighborhood in New York\n", - " City in 2010. While wintering, bald eagles tend to be less habitat and disturbance\n", - " sensitive. They will commonly congregate at spots with plentiful perches and waters\n", - " with plentiful prey and (in northern climes) partially unfrozen waters. Alternately,\n", - " non-breeding or wintering bald eagles, particularly in areas with a lack of human\n", - " disturbance, spend their time in various upland, terrestrial habitats sometimes\n", - " quite far away from waterways. In the northern half of North America (especially the\n", - " interior portion), this terrestrial inhabitance by bald eagles tends to be\n", - " especially prevalent because unfrozen water may not be accessible. Upland wintering\n", - " habitats often consist of open habitats with concentrations of medium-sized mammals,\n", - " such as prairies, meadows or tundra, or open forests with regular carrion access.\n", - " The bald eagle is a powerful flier, and soars on thermal convection currents. It\n", - " reaches speeds of 56–70 km/h (35–43 mph) when gliding and flapping, and about 48\n", - " km/h (30 mph) while carrying fish. Its dive speed is between 120–160 km/h (75–99\n", - " mph), though it seldom dives vertically. Regarding their flying abilities, despite\n", - " being morphologically less well adapted to faster flight than golden eagles\n", - " (especially during dives), the bald eagle is considered surprisingly maneuverable in\n", - " flight. Bald eagles have also been recorded catching up to and then swooping under\n", - " geese in flight, turning over and thrusting their talons into the other bird's\n", - " breast. It is partially migratory, depending on location. If its territory has\n", - " access to open water, it remains there year-round, but if the body of water freezes\n", - " during the winter, making it impossible to obtain food, it migrates to the south or\n", - " to the coast. A number of populations are subject to post-breeding dispersal, mainly\n", - " in juveniles; Florida eagles, for example, will disperse northwards in the summer.\n", - " The bald eagle selects migration routes which take advantage of thermals, updrafts,\n", - " and food resources. During migration, it may ascend in a thermal and then glide\n", - " down, or may ascend in updrafts created by the wind against a cliff or other\n", - " terrain. Migration generally takes place during the daytime, usually between the\n", - " local hours of 8:00 a.m. and 6:00 p.m., when thermals are produced by the sun. The\n", - " bald eagle is an opportunistic carnivore with the capacity to consume a great\n", - " variety of prey. Fish often comprise most of the eagle's diet throughout their\n", - " range. In 20 food habit studies across the species' range, fish comprised 56% of the\n", - " diet of nesting eagles, birds 28%, mammals 14% and other prey 2%. More than 400\n", - " species are known to be included in the bald eagle's prey spectrum, far more than\n", - " its ecological equivalent in the Old World, the white-tailed eagle, is known to\n", - " take. Despite its considerably lower population, the bald eagle may come in second\n", - " amongst all North American accipitrids, slightly behind only the red-tailed hawk, in\n", - " number of prey species recorded. To hunt fish, the eagle swoops down over the water\n", - " and snatches the fish out of the water with its talons. They eat by holding the fish\n", - " in one claw and tearing the flesh with the other. Eagles have structures on their\n", - " toes called spicules that allow them to grasp fish. Ospreys also have this\n", - " adaptation. Bird prey may occasionally be attacked in flight, with prey up to the\n", - " size of Canada geese attacked and killed in mid-air. It has been estimated that the\n", - " bald eagle's gripping power (pounds by square inch) is ten times greater than that\n", - " of a human. Bald eagles can fly with fish at least equal to their own weight, but if\n", - " the fish is too heavy to lift, the eagle may be dragged into the water. Bald eagles\n", - " can swim, but in some cases, they drag their catch ashore with their talons. Still,\n", - " some eagles drown or succumb to hypothermia. Many sources claim that bald eagles,\n", - " like all large eagles, cannot normally take flight carrying prey more than half of\n", - " their own weight unless aided by favorable wind conditions. On numerous occasions,\n", - " when large prey such as large fish including mature salmon or geese are attacked,\n", - " eagles have been seen to make contact and then drag the prey in a strenuously\n", - " labored, low flight over the water to a bank, where they then finish off and\n", - " dismember the prey. When food is abundant, an eagle can gorge itself by storing up\n", - " to 1 kg (2.2 lb) of food in a pouch in the throat called a crop. Gorging allows the\n", - " bird to fast for several days if food becomes unavailable. Occasionally, bald eagles\n", - " may hunt cooperatively when confronting prey, especially relatively large prey such\n", - " as jackrabbits or herons, with one bird distracting potential prey, while the other\n", - " comes behind it in order to ambush it. While hunting waterfowl, bald eagles\n", - " repeatedly fly at a target and cause it to dive repeatedly, hoping to exhaust the\n", - " victim so it can be caught (white-tailed eagles have been recorded hunting waterfowl\n", - " in the same way). When hunting concentrated prey, a successful catch often results\n", - " in the hunting eagle being pursued by other eagles and needing to find an isolated\n", - " perch for consumption if it is able to carry it away successfully. They obtain much\n", - " of their food as carrion or via a practice known as kleptoparasitism, by which they\n", - " steal prey away from other predators. Due to their dietary habits, bald eagles are\n", - " frequently viewed in a negative light by humans. Thanks to their superior foraging\n", - " ability and experience, adults are generally more likely to hunt live prey than\n", - " immature eagles, which often obtain their food from scavenging. They are not very\n", - " selective about the condition or origin, whether provided by humans, other animals,\n", - " auto accidents or natural causes, of a carcass's presence, but will avoid eating\n", - " carrion where disturbances from humans are a regular occurrence. They will scavenge\n", - " carcasses up to the size of whales, though carcasses of ungulates and large fish are\n", - " seemingly preferred. Congregated wintering waterfowl are frequently exploited for\n", - " carcasses to scavenge by immature eagles in harsh winter weather. Bald eagles also\n", - " may sometimes feed on material scavenged or stolen from campsites and picnics, as\n", - " well as garbage dumps (dump usage is habitual mainly in Alaska) and fish-processing\n", - " plants. In Southeast Alaska, fish comprise approximately 66% of the year-round diet\n", - " of bald eagles and 78% of the prey brought to the nest by the parents. Eagles living\n", - " in the Columbia River Estuary in Oregon were found to rely on fish for 90% of their\n", - " dietary intake. At least 100 species of fish have been recorded in the bald eagle's\n", - " diet. From observation in the Columbia River, 58% of the fish were caught alive by\n", - " the eagle, 24% were scavenged as carcasses and 18% were pirated away from other\n", - " animals. In the Pacific Northwest, spawning trout and salmon provide most of the\n", - " bald eagles' diet from late summer throughout fall. Though bald eagles occasionally\n", - " catch live salmon, they usually scavenge spawned salmon carcass. Southeast Alaskan\n", - " eagles largely prey on pink salmon (Oncorhynchus gorbuscha), coho salmon (O.\n", - " kisutch) and, more locally, sockeye salmon (O. nerka), with Chinook salmon (O.\n", - " tshawytscha). Due to the Chinook salmon's large size (12 to 18 kg (26 to 40 lb)\n", - " average adult size) probably being taken only as carrion and a single carcass can\n", - " attract several eagles. Also important in the estuaries and shallow coastlines of\n", - " southern Alaska are Pacific herring (Clupea pallasii), Pacific sand lance (Ammodytes\n", - " hexapterus) and eulachon (Thaleichthys pacificus). In Oregon's Columbia River\n", - " Estuary, the most significant prey species were largescale suckers (Catostomus\n", - " macrocheilus) (17.3% of the prey selected there), American shad (Alosa sapidissima;\n", - " 13%) and common carp (Cyprinus carpio; 10.8%). Eagles living in the Chesapeake Bay\n", - " in Maryland were found to subsist largely on American gizzard shad (Dorosoma\n", - " cepedianum), threadfin shad (Dorosoma petenense) and white bass (Morone chrysops).\n", - " Floridian eagles have been reported to prey on catfish, most prevalently the brown\n", - " bullhead (Ameiurus nebulosus) and any species in the genus Ictalurus as well as\n", - " mullet, trout, needlefish, and eels. Chain pickerels (Esox niger) and white suckers\n", - " (Catostomus commersonii) are frequently taken in interior Maine. Wintering eagles on\n", - " the Platte River in Nebraska preyed mainly on American gizzard shads and common\n", - " carp. Bald eagles are also known to eat the following fish species: rainbow trout\n", - " (Oncorhynchus mykiss), white catfish (Ameiurus catus), rock greenling (Hexagrammos\n", - " lagocephalus), Pacific cod (Gadus macrocephalus), Atka mackerel (Pleurogrammus\n", - " monopterygius), largemouth bass (Micropterus salmoides), northern pike (Esox\n", - " lucius), striped bass (Morone saxatilis), dogfish shark (Squalidae.sp) and Blue\n", - " walleye (Sander vitreus). Fish taken by bald eagles varies in size, but bald eagles\n", - " take larger fish than other piscivorous birds in North America, typically range from\n", - " 20 to 75 cm (7.9 to 29.5 in) and prefer 36 cm (14 in) fish. When experimenters\n", - " offered fish of different sizes in the breeding season around Lake Britton in\n", - " California, fish measuring 34 to 38 cm (13 to 15 in) were taken 71.8% of the time by\n", - " parent eagles while fish measuring 23 to 27.5 cm (9.1 to 10.8 in) were chosen only\n", - " 25% of the time. At nests around Lake Superior, the remains of fish (mostly suckers)\n", - " were found to average 35.4 cm (13.9 in) in total length. In the Columbia River\n", - " estuary, most preyed on by eagles were estimated to measure less than 30 cm (12 in),\n", - " but larger fish between 30 and 60 cm (12 and 24 in) or even exceeding 60 cm (24 in)\n", - " in length also taken especially during the non-breeding seasons. They can take fish\n", - " up to at least twice their own weight, such as large mature salmons, carps, or even\n", - " muskellunge (Esox masquinongy), by dragging its catch with talons and pull toward\n", - " ashore. Much larger marine fish such as Pacific halibut (Hippoglossus stenolepis)\n", - " and lemon sharks (Negaprion brevirostris) have been recorded among bald eagle prey\n", - " though probably are only taken as young, as small, newly mature fish, or as carrion.\n", - " Benthic fishes such as catfish are usually consumed after they die and float to the\n", - " surface, though while temporarily swimming in the open may be more vulnerable to\n", - " predation than most fish since their eyes focus downwards. Bald eagles also\n", - " regularly exploit water turbines which produce battered, stunned or dead fish easily\n", - " consumed. Predators who leave behind scraps of dead fish that they kill, such as\n", - " brown bears (Ursus arctos), gray wolves (Canis lupus) and red foxes (Vulpes vulpes),\n", - " may be habitually followed in order to scavenge the kills secondarily. Once North\n", - " Pacific salmon die off after spawning, usually local bald eagles eat salmon\n", - " carcasses almost exclusively. Eagles in Washington need to consume 489 g (1.078 lb)\n", - " of fish each day for survival, with adults generally consuming more than juveniles\n", - " and thus reducing potential energy deficiency and increasing survival during winter.\n", - " Behind fish, the next most significant prey base for bald eagles are other\n", - " waterbirds. The contribution of such birds to the eagle's diet is variable,\n", - " depending on the quantity and availability of fish near the water's surface.\n", - " Waterbirds can seasonally comprise from 7% to 80% of the prey selection for eagles\n", - " in certain localities. Overall, birds are the most diverse group in the bald eagle's\n", - " prey spectrum, with 200 prey species recorded. Bird species most preferred as prey\n", - " by eagles tend to be medium-sized, such as western grebes (Aechmophorus\n", - " occidentalis), mallards (Anas platyrhynchos), and American coots (Fulica americana)\n", - " as such prey is relatively easy for the much larger eagles to catch and fly with.\n", - " American herring gull (Larus smithsonianus) are the favored avian prey species for\n", - " eagles living around Lake Superior. Black ducks (Anas rubripes), common eiders\n", - " (Somateria mollissima), and double-crested cormorants (Phalacrocorax auritus) are\n", - " also frequently taken in coastal Maine and velvet scoter (Melanitta fusca) was\n", - " dominant prey in San Miguel Island. Due to easy accessibility and lack of\n", - " formidable nest defense against eagles by such species, bald eagles are capable of\n", - " preying on such seabirds at all ages, from eggs to mature adults, and they can\n", - " effectively cull large portions of a colony. Along some portions of the North\n", - " Pacific coastline, bald eagles which had historically preyed mainly kelp-dwelling\n", - " fish and supplementally sea otter (Enhydra lutris) pups are now preying mainly on\n", - " seabird colonies since both the fish (possibly due to overfishing) and otters (cause\n", - " unknown) have had steep population declines, causing concern for seabird\n", - " conservation. Because of this more extensive predation, some biologist has expressed\n", - " concern that murres are heading for a \"conservation collision\" due to heavy eagle\n", - " predation. Eagles have been confirmed to attack nocturnally active, burrow-nesting\n", - " seabird species such as storm petrels and shearwaters by digging out their burrows\n", - " and feeding on all animals they find inside. If a bald eagle flies close by,\n", - " waterbirds will often fly away en masse, though they may seemingly ignore a perched\n", - " eagle in other cases. when the birds fly away from a colony, this exposes their\n", - " unprotected eggs and nestlings to scavengers such as gulls. While they usually\n", - " target small to medium-sized seabirds, larger seabirds such as great black-backed\n", - " gulls (Larus marinus) and northern gannets (Morus bassanus) and brown pelicans\n", - " (Pelecanus occidentalis) of all ages can successfully be taken by bald eagles.\n", - " Similarly, large waterbirds are occasionally killed. Geese such as wintering emperor\n", - " geese (Chen canagica) and snow geese (C. caerulescens), which gather in large\n", - " groups, sometimes becoming regular prey. Smaller Ross's geese (Anser rossii) are\n", - " also taken, as well as large-sized Canada geese (Branta canadensis). Predation on\n", - " the largest subspecies (Branta canadensis maxima) has been reported. Other large\n", - " waterbird prey include common loons (Gavia immer) of all ages. Large wading birds\n", - " can also fall prey to bald eagles. For the great blue herons (Ardea herodias), bald\n", - " eagles are their only serious enemies of all ages. Slightly larger Sandhill cranes\n", - " (Grus canadensis) can be taken as well. While adult whooping cranes (Grus americana)\n", - " are too large and formidable, their chicks can fall prey to bald eagles. They even\n", - " occasionally prey on adult tundra swans (Cygnus columbianus). Young trumpeter swans\n", - " (Cygnus buccinator) are also taken, and an unsuccessful attack on an adult swan has\n", - " been photographed. Bald eagles have been occasionally recorded as killing other\n", - " raptors. In some cases, these may be attacks of competition or kleptoparasitism on\n", - " rival species but end with the consumption of the dead victims. Nine species of\n", - " other accipitrids and owls are known to have been preyed upon by bald eagles. Owl\n", - " prey species have ranged in size from western screech-owls (Megascops kennicotti) to\n", - " snowy owls (Bubo scandiacus). Larger diurnal raptors known to have fallen victim to\n", - " bald eagles have included red-tailed hawks (Buteo jamaicensis), peregrine falcons\n", - " (Falco peregrinus), northern goshawks (Accipiter gentilis), ospreys (Pandion\n", - " haliaetus) and black (Coragyps atratus) and turkey vultures (Cathartes aura).\n", - " Mammalian preys are generally less frequently taken than fish or avian prey.\n", - " However, in some regions, such as landlocked areas of North America, wintering bald\n", - " eagles may become habitual predators of medium-sized mammals that occur in colonies\n", - " or local concentrations, such as prairie dogs (Cynomys sp.) and jackrabbits (Lepus\n", - " sp.). Bald eagles in Seedskadee National Wildlife Refuge often hunt in pair to catch\n", - " cottontails, jackrabbits and prairie dogs. They can attack and prey on rabbits and\n", - " hares of nearly any size, from marsh rabbits (Sylvilagus palustris) to black and\n", - " white-tailed jackrabbits (Lepus californicus & L. townsendii), and Arctic hares\n", - " (Lepus arcticus). In San Luis Valley, white-tailed jackrabbits can be important\n", - " prey. Additionally, rodents such as montane voles (Microtus montanus), brown rats\n", - " (Rattus norvegicus), and various squirrels are taken as supplementary prey. Larger\n", - " rodents such as muskrats (Ondatra zibethicus), young or small adult nutrias\n", - " (Myocastor coypus) and groundhogs (Marmota monax) are also preyed upon. Even\n", - " American porcupines (Erethizon dorsatum) are reportedly attacked and killed. Where\n", - " available, seal colonies can provide a lot of food. On Protection Island,\n", - " Washington, they commonly feed on harbor seal (Phoca vitulina) afterbirths, still-\n", - " borns and sickly seal pups. Similarly, bald eagles in Alaska readily prey on sea\n", - " otter (Enhydra lutris) pups. Small to medium-sized terrestrial mammalian carnivores\n", - " can be taken infrequently. Mustelid including American martens (Martes pennanti),\n", - " American minks (Neogale vison), and larger fisher cats (Pekania pennanti) are known\n", - " to be hunted. Foxes are also taken, including Island foxes ( Urocyon littoralis ),\n", - " Arctic foxes (Vulpes lagopus), and grey foxes (Urocyon cinereoargenteus). Although\n", - " fox farmers claimed that bald eagle heavily prey on young and adult free-range\n", - " Arctic fox, the predation events are sporadic. In one instance, two bald Eagles fed\n", - " upon a red fox (Vulpes vulpes) that had tried to cross a frozen Delaware Lake. Other\n", - " medium-sized carnivorans such as striped skunks (Mephitis mephitis), American hog-\n", - " nosed skunks (Conepatus leuconotus), and common raccoons (Procyon lotor) are taken,\n", - " as well as domestic cats (Felis catus) and dogs (canis familiaris). Other wild\n", - " mammalian prey include fawns of deer such as white-tailed deer (Odocoileus\n", - " virginianus) and Sitka deer (Odocoileus hemionus sitkensis), which weigh around 3 kg\n", - " (6.6 lb) can be taken alive by bald eagles. In one instance, a bald eagle was\n", - " observed carrying 6.8 kg (15 lb) mule deer (Odocoileus hemionus) fawn. Additionally,\n", - " Virginia opossums (Didelphis virginiana) can be preyed upon. Still, predation events\n", - " are rare due to their nocturnal habits. Together with the golden eagle, bald eagles\n", - " are occasionally accused of preying on livestock, especially sheep (Ovis aries).\n", - " There are a handful of proven cases of lamb predation, some specimens weighing up to\n", - " 11 kg (24 lb), by bald eagles. Still, they are much less likely to attack a healthy\n", - " lamb than a golden eagle. Both species prefer native, wild prey and are unlikely to\n", - " cause any extensive detriment to human livelihoods. There is one case of a bald\n", - " eagle killing and feeding on an adult, pregnant ewe (then joined in eating the kill\n", - " by at least 3 other eagles), which, weighing on average over 60 kg (130 lb), is much\n", - " larger than any other known prey taken by this species. Supplemental prey is\n", - " readily taken given the opportunity. In some areas, reptiles may become regular\n", - " prey, especially in warm areas such as Florida where reptile diversity is high.\n", - " Turtles are perhaps the most regularly hunted type of reptile. In coastal New\n", - " Jersey, 14 of 20 studied eagle nests included remains of turtles. The main species\n", - " found were common musk turtles (Sternotherus odoratus), diamondback terrapin\n", - " (Malaclemys terrapin) and juvenile common snapping turtles (Chelydra serpentina). In\n", - " these New Jersey nests, mainly subadult and small adults were taken, ranging in\n", - " carapace length from 9.2 to 17.1 cm (3.6 to 6.7 in). Similarly, many turtles were\n", - " recorded in the diet in the Chesapeake Bay. In Texas, softshell turtles are the most\n", - " frequently taken prey, and a large number of Barbour's map turtles are taken in\n", - " Torreya State Park. Other reptilian and amphibian prey includes southern alligator\n", - " lizards (Elgaria multicarinata), snakes such as garter snakes and rattlesnakes, and\n", - " Greater siren (Siren lacertina). Invertebrates are occasionally taken. In Alaska,\n", - " eagles feed on sea urchins (Strongylocentrotus sp.), chitons, mussels, and crabs.\n", - " Other various mollusks such as land snails, abalones, bivalves, periwinkles, blue\n", - " mussels, squids, and starfishes are taken as well. When competing for food, eagles\n", - " will usually dominate other fish-eaters and scavengers, aggressively displacing\n", - " mammals such as coyotes (Canis latrans) and foxes, and birds such as corvids, gulls,\n", - " vultures and other raptors. Occasionally, coyotes, bobcats (Lynx rufus) and domestic\n", - " dogs (Canis familiaris) can displace eagles from carrion, usually less confident\n", - " immature birds, as has been recorded in Maine. Bald eagles are less active, bold\n", - " predators than golden eagles and get relatively more of their food as carrion and\n", - " from kleptoparasitism (although it is now generally thought that golden eagles eat\n", - " more carrion than was previously assumed). However, the two species are roughly\n", - " equal in size, aggressiveness and physical strength and so competitions can go\n", - " either way. Neither species is known to be dominant, and the outcome depends on the\n", - " size and disposition of the individual eagles involved. Wintering bald and golden\n", - " eagles in Utah both sometimes won conflicts, though in one recorded instance a\n", - " single bald eagle successfully displaced two consecutive golden eagles from a kill.\n", - " Though bald eagles face few natural threats, an unusual attacker comes in the form\n", - " of the common loon (G. immer), which is also taken by eagles as prey. While common\n", - " loons normally avoid conflict, they are highly territorial and will attack predators\n", - " and competitors by stabbing at them with their knife-like bill; as the range of the\n", - " bald eagle has increased following conservation efforts, these interactions have\n", - " been observed on several occasions, including a fatality of a bald eagle in Maine\n", - " that is presumed to have come about as a result of it attacking a nest, then having\n", - " a fatal puncture wound inflicted by one or both loon parents. The bald eagle is\n", - " thought to be much more numerous in North America than the golden eagle, with the\n", - " bald species estimated to number at least 150,000 individuals, about twice as many\n", - " golden eagles there are estimated to live in North America. Due to this, bald eagles\n", - " often outnumber golden eagles at attractive food sources. Despite the potential for\n", - " contention between these animals, in New Jersey during winter, a golden eagle and\n", - " numerous bald eagles were observed to hunt snow geese alongside each other without\n", - " conflict. Similarly, both eagle species have been recorded, via video-monitoring, to\n", - " feed on gut piles and carcasses of white-tailed deer (Odocoileus virginianus) in\n", - " remote forest clearings in the eastern Appalachian Mountains without apparent\n", - " conflict. Bald eagles are frequently mobbed by smaller raptors, due to their\n", - " infrequent but unpredictable tendency to hunt other birds of prey. Many bald eagles\n", - " are habitual kleptoparasites, especially in winters when fish are harder to come by.\n", - " They have been recorded stealing fish from other predators such as ospreys, herons\n", - " and even otters. They have also been recorded opportunistically pirating birds from\n", - " peregrine falcons (Falco peregrinus), prairie dogs from ferruginous hawks (Buteo\n", - " regalis) and even jackrabbits from golden eagles. When they approach scavengers such\n", - " as dogs, gulls or vultures at carrion sites, they often attack them in an attempt to\n", - " force them to disgorge their food. Healthy adult bald eagles are not preyed upon in\n", - " the wild and are thus considered apex predators. Bald eagles are sexually mature at\n", - " four or five years of age. When they are old enough to breed, they often return to\n", - " the area where they were born. Bald eagles have high mate fidelity and generally\n", - " mate for life. However, if one pair member dies or disappears, the survivor will\n", - " choose a new mate. A pair that has repeatedly failed in breeding attempts may split\n", - " and look for new mates. Bald eagle courtship involves elaborate, spectacular calls\n", - " and flight displays by the males. The flight includes swoops, chases, and\n", - " cartwheels, in which they fly high, lock talons, and free-fall, separating just\n", - " before hitting the ground. Usually, a territory defended by a mature pair will be 1\n", - " to 2 km (0.62 to 1.24 mi) of waterside habitat. Compared to most other raptors,\n", - " which mostly nest in April or May, bald eagles are early breeders: nest building or\n", - " reinforcing is often by mid-February, egg laying is often late February (sometimes\n", - " during deep snow in the North), and incubation is usually mid-March and early May.\n", - " Eggs hatch from mid-April to early May, and the young fledge from late June to early\n", - " July. The nest is the largest of any bird in North America; it is used repeatedly\n", - " over many years and with new material added each year may eventually be as large as\n", - " 4 m (13 ft) deep, 2.5 m (8.2 ft) across and weigh 1 metric ton (1.1 short tons). One\n", - " nest in Florida was found to be 6.1 m (20 ft) deep, 2.9 meters (9.5 ft) across, and\n", - " to weigh 3 short tons (2.7 metric tons). This nest is on record as the largest tree\n", - " nest ever recorded for any animal. Usually nests are used for under five years, as\n", - " they either collapse in storms or break the branches supporting them by their sheer\n", - " weight. However, one nest in the Midwest was occupied continuously for at least 34\n", - " years. The nest is built of branches, usually in large trees found near water. When\n", - " breeding where there are no trees, the bald eagle will nest on the ground, as has\n", - " been recorded largely in areas largely isolated from terrestrial predators, such as\n", - " Amchitka Island in Alaska. In Sonora, Mexico, eagles have been observed nesting on\n", - " top of hecho catcuses (Pachycereus pectin-aboriginum). Nests located on cliffs and\n", - " rock pinnacles have been reported historically in California, Kansas, Nevada, New\n", - " Mexico and Utah, but currently are only verified to occur only in Alaska and\n", - " Arizona. The eggs average about 73 mm (2.9 in) long, ranging from 58 to 85 mm (2.3\n", - " to 3.3 in), and have a breadth of 54 mm (2.1 in), ranging from 47 to 63 mm (1.9 to\n", - " 2.5 in). Eggs in Alaska averaged 130 g (4.6 oz) in mass, while in Saskatchewan they\n", - " averaged 114.4 g (4.04 oz). As with their ultimate body size, egg size tends to\n", - " increase with distance from the equator. Eagles produce between one and three eggs\n", - " per year, two being typical. Rarely, four eggs have been found in nests, but these\n", - " may be exceptional cases of polygyny. Eagles in captivity have been capable of\n", - " producing up to seven eggs. It is rare for all three chicks to successfully reach\n", - " the fledgling stage. The oldest chick often bears the advantage of a larger size and\n", - " louder voice, which tends to draw the parents' attention towards it. Occasionally,\n", - " as is recorded in many large raptorial birds, the oldest sibling sometimes attacks\n", - " and kills its younger sibling(s), especially early in the nesting period when their\n", - " sizes are most different. However, nearly half of the known bald eagles produce two\n", - " fledglings (more rarely three), unlike in some other \"eagle\" species such as some in\n", - " the genus Aquila, in which a second fledgling is typically observed in less than 20%\n", - " of nests, despite two eggs typically being laid. Both the male and female take turns\n", - " incubating the eggs, but the female does most of the sitting. The parent not\n", - " incubating will hunt for food or look for nesting material during this stage. For\n", - " the first two to three weeks of the nestling period, at least one adult is at the\n", - " nest almost 100% of the time. After five to six weeks, the attendance of parents\n", - " usually drops off considerably (with the parents often perching in trees nearby). A\n", - " young eaglet can gain up to 170 g (6.0 oz) a day, the fastest growth rate of any\n", - " North American bird. The young eaglets pick up and manipulate sticks, play tug of\n", - " war with each other, practice holding things in their talons, and stretch and flap\n", - " their wings. By eight weeks, the eaglets are strong enough to flap their wings, lift\n", - " their feet off the nest platform, and rise in the air. The young fledge at anywhere\n", - " from 8 to 14 weeks of age, though will remain close to the nest and be attended to\n", - " by their parents for a further 6 weeks. Juvenile eagles first start dispersing away\n", - " from their parents about 8 weeks after they fledge. Variability in departure date\n", - " related to effects of sex and hatching order on growth and development. For the next\n", - " four years, immature eagles wander widely in search of food until they attain adult\n", - " plumage and are eligible to reproduce. Male eagles have been observed killing and\n", - " cannibalizing their chicks. In 2024 at the National Conservation Training Center in\n", - " West Virginia, the NCTC's Eagle Cam recorded two bald eagle chicks being attacked\n", - " and devoured by their father as soon as the mother departed from the nest. The NCTC\n", - " noted in its statement on the incident that such behavior \"has been observed in\n", - " other nests and is not uncommon in birds of prey.\" On rare occasions, bald eagles\n", - " have been recorded to adopt other raptor fledglings into their nests, as seen in\n", - " 2017 by a pair of eagles in Shoal Harbor Migratory Bird Sanctuary near Sidney,\n", - " British Columbia. The pair of eagles in question are believed to have carried a\n", - " juvenile red-tailed hawk back to their nest, presumably as prey, whereupon the chick\n", - " was accepted into the family by both the parents and the eagles' three nestlings.\n", - " The hawk, nicknamed \"Spunky\" by biologists monitoring the nest, fledged\n", - " successfully. The average lifespan of bald eagles in the wild is around 20 years,\n", - " with the oldest confirmed one having been 38 years of age. In captivity, they often\n", - " live somewhat longer. In one instance, a captive individual in New York lived for\n", - " nearly 50 years. As with size, the average lifespan of an eagle population appears\n", - " to be influenced by its location and access to prey. As they are no longer heavily\n", - " persecuted, adult mortality is quite low. In one study of Florida eagles, adult bald\n", - " eagles reportedly had 100% annual survival rate. In Prince William Sound in Alaska,\n", - " adults had an annual survival rate of 88% even after the Exxon Valdez oil spill\n", - " adversely affected eagles in the area. Of 1,428 individuals from across the range\n", - " necropsied by National Wildlife Health Center from 1963 to 1984, 329 (23%) eagles\n", - " died from trauma, primarily impact with wires and vehicles; 309 (22%) died from\n", - " gunshot; 158 (11%) died from poisoning; 130 (9%) died from electrocution; 68 (5%)\n", - " died from trapping; 110 (8%) from emaciation; and 31 (2%) from disease; cause of\n", - " death was undetermined in 293 (20%) of cases. In this study, 68% of mortality was\n", - " human-caused. Today, eagle-shooting is believed to be considerably reduced due to\n", - " the species' protected status. A U.S. Fish and Wildlife Service study of 1,490 bald\n", - " eagle deaths from 1986 through 2017 in Michigan found that 532 (36%) died due to\n", - " being struck by cars while scavenging roadkill and 176 (12%) died due to lead\n", - " poisoning from ingesting fragments of lead ammo and fishing gear present in carrion,\n", - " with the proportion of both causes of death increasing significantly towards the end\n", - " of the study period. Most non-human-related mortality involves nestlings or eggs.\n", - " Around 50% of eagles survive their first year. However, in the Chesapeake Bay area,\n", - " 100% of 39 radio-tagged nestlings survived to their first year. Nestling or egg\n", - " fatalities may be due to nest collapses, starvation, sibling aggression or inclement\n", - " weather. Another significant cause of egg and nestling mortality is predation. Nest\n", - " predators include large gulls, corvids (including ravens, crows and magpies),\n", - " wolverines (Gulo gulo), fishers (Pekania pennanti), red-tailed hawks, owls, other\n", - " eagles, bobcats, American black bears (Ursus americanus) and raccoons. If food\n", - " access is low, parental attendance at the nest may be lower because both parents may\n", - " have to forage, thus resulting in less protection. Nestlings are usually exempt from\n", - " predation by terrestrial carnivores that are poor tree-climbers, but Arctic foxes\n", - " (Vulpes lagopus) occasionally snatched nestlings from ground nests on Amchitka\n", - " Island in Alaska before they were extirpated from the island. The bald eagle will\n", - " defend its nest fiercely from all comers and has even repelled attacks from bears,\n", - " having been recorded knocking a black bear out of a tree when the latter tried to\n", - " climb a tree holding nestlings. Once a common sight in much of the continent, the\n", - " bald eagle was severely affected in the mid-20th century by a variety of factors,\n", - " among them the thinning of egg shells attributed to use of the pesticide DDT. Bald\n", - " eagles, like many birds of prey, were especially affected by DDT due to\n", - " biomagnification. DDT itself was not lethal to the adult bird, but it interfered\n", - " with their calcium metabolism, making them either sterile or unable to lay healthy\n", - " eggs; many of their eggs were too brittle to withstand the weight of a brooding\n", - " adult, making it nearly impossible for them to hatch. It is estimated that in the\n", - " early 18th century the bald eagle population was 300,000–500,000, but by the 1950s\n", - " there were only 412 nesting pairs in the 48 contiguous states of the US. Other\n", - " factors in bald eagle population reductions were a widespread loss of suitable\n", - " habitat, as well as both legal and illegal shooting. In 1930 a New York City\n", - " ornithologist wrote that in the territory of Alaska in the previous 12 years\n", - " approximately 70,000 bald eagles had been shot. Many of the hunters killed the bald\n", - " eagles under the long-held beliefs that bald eagles grabbed young lambs and even\n", - " children with their talons, yet the birds were innocent of most of these alleged\n", - " acts of predation (lamb predation is rare, human predation is thought to be non-\n", - " existent). Illegal shooting was described as \"the leading cause of direct mortality\n", - " in both adult and immature bald eagles\" by the U.S. Fish and Wildlife Service in\n", - " 1978. Leading causes of death in bald eagles include lead pollution, poisoning,\n", - " collision with motor vehicles, and power-line electrocution. A study published in\n", - " 2022 in the journal Science found that more than half of adult eagles across 38 US\n", - " states suffered from lead poisoning. The primary cause is when eagles scavenge\n", - " carcasses of animals shot by hunters. These are often tainted with lead shotgun\n", - " pellets, rifle rounds, or fishing tackle. The species was first protected in the\n", - " U.S. and Canada by the 1918 Migratory Bird Treaty, later extended to all of North\n", - " America. The Bald and Golden Eagle Protection Act, approved by the U.S. Congress in\n", - " 1940, protected the bald eagle and the golden eagle, prohibiting commercial trapping\n", - " and killing of the birds as well as collecting their eggs. The bald eagle was\n", - " declared an endangered species in the U.S. in 1967, and amendments to the 1940 act\n", - " between 1962 and 1972 further restricted commercial uses and increased penalties for\n", - " violators. Perhaps most significant in the species' recovery, in 1972, DDT was\n", - " banned from usage in the United States due to the fact that it inhibited the\n", - " reproduction of many birds. DDT was completely banned in Canada in 1989, though its\n", - " use had been highly restricted since the late 1970s. With regulations in place and\n", - " DDT banned, the eagle population rebounded. The bald eagle can be found in growing\n", - " concentrations throughout the United States and Canada, particularly near large\n", - " bodies of water. In the early 1980s, the estimated total population was 100,000\n", - " individuals, with 110,000–115,000 by 1992; the U.S. state with the largest resident\n", - " population is Alaska, with about 40,000–50,000, with the next highest population the\n", - " Canadian province of British Columbia with 20,000–30,000 in 1992. Obtaining a\n", - " precise count of the bald eagle population is extremely difficult. The most recent\n", - " data submitted by individual states was in 2006, when 9789 breeding pairs were\n", - " reported. For some time, the stronghold breeding population of bald eagles in the\n", - " lower 48 states was in Florida, where over a thousand pairs have held on while\n", - " populations in other states were significantly reduced by DDT use. Today, the\n", - " contiguous state with the largest number of breeding pairs of eagles is Minnesota\n", - " with an estimated 1,312 pairs, surpassing Florida's most recent count of 1,166\n", - " pairs. 23, or nearly half, of the 48 contiguous states now have at least 100\n", - " breeding pairs of bald eagles. In Washington State, there were only 105 occupied\n", - " nests in 1980. That number increased by about 30 per year, so that by 2005 there\n", - " were 840 occupied nests. 2005 was the last year that the Washington Department of\n", - " Fish and Wildlife counted occupied nests. Further population increases in Washington\n", - " may be limited by the availability of late winter food, particularly salmon. The\n", - " bald eagle was officially removed from the U.S. federal government's list of\n", - " endangered species on July 12, 1995, by the U.S. Fish & Wildlife Service, when it\n", - " was reclassified from \"endangered\" to \"threatened\". On July 6, 1999, a proposal was\n", - " initiated \"To Remove the Bald Eagle in the Lower 48 States From the List of\n", - " Endangered and Threatened Wildlife\". It was de-listed on June 28, 2007. It has also\n", - " been assigned a risk level of least concern category on the IUCN Red List. In the\n", - " Exxon Valdez oil spill of 1989 an estimated 247 were killed in Prince William Sound,\n", - " though the local population returned to its pre-spill level by 1995. In some areas,\n", - " the increase in eagles has led to decreases in other bird populations and the eagles\n", - " may be considered a pest. In December 2016, the U.S. Fish and Wildlife Service\n", - " proposed extending the permits issued to wind generation companies to allow them to\n", - " kill up to 4,200 bald eagles per year without facing a penalty, four times the\n", - " previous number. The permits would last 30 years, six times the previous 5-year\n", - " term. Permits are required to keep bald eagles in captivity in the United States.\n", - " Permits are primarily issued to public educational institutions, and the eagles that\n", - " they show are permanently injured individuals that cannot be released to the wild.\n", - " The facilities where eagles are kept must be equipped with adequate caging, as well\n", - " as workers experienced in the handling and care of eagles. The bald eagle can be\n", - " long-lived in captivity if well cared for, but does not breed well even under the\n", - " best conditions. In Canada and in England a license is required to keep bald eagles\n", - " for falconry. Bald eagles cannot legally be kept for falconry in the United States,\n", - " but a license may be issued in some jurisdictions to allow use of such eagles in\n", - " birds-of-prey flight shows. The bald eagle is important in various Native American\n", - " cultures and, as the national symbol of the United States, is prominent in seals and\n", - " logos, coinage, postage stamps, and other items relating to the U.S. federal\n", - " government. The bald eagle is a sacred bird in some North American cultures, and\n", - " its feathers, like those of the golden eagle, are central to many religious and\n", - " spiritual customs among Native Americans. Eagles are considered spiritual messengers\n", - " between gods and humans by some cultures. Many pow wow dancers use the eagle claw as\n", - " part of their regalia as well. Eagle feathers are often used in traditional\n", - " ceremonies, particularly in the construction of regalia worn and as a part of fans,\n", - " bustles and head dresses. In the Navajo tradition an eagle feather is represented to\n", - " be a protector, along with the feather Navajo medicine men use the leg and wing\n", - " bones for ceremonial whistles. The Lakota, for instance, give an eagle feather as a\n", - " symbol of honor to person who achieves a task. In modern times, it may be given on\n", - " an event such as a graduation from college. The Pawnee consider eagles as symbols of\n", - " fertility because their nests are built high off the ground and because they\n", - " fiercely protect their young. The Choctaw consider the bald eagle, who has direct\n", - " contact with the upper world of the sun, as a symbol of peace. During the Sun\n", - " Dance, which is practiced by many Plains Indian tribes, the eagle is represented in\n", - " several ways. The eagle nest is represented by the fork of the lodge where the dance\n", - " is held. A whistle made from the wing bone of an eagle is used during the course of\n", - " the dance. Also during the dance, a medicine man may direct his fan, which is made\n", - " of eagle feathers, to people who seek to be healed. The medicine man touches the fan\n", - " to the center pole and then to the patient, in order to transmit power from the pole\n", - " to the patient. The fan is then held up toward the sky, so that the eagle may carry\n", - " the prayers for the sick to the Creator. Current eagle feather law stipulates that\n", - " only individuals of certifiable Native American ancestry enrolled in a federally\n", - " recognized tribe are legally authorized to obtain or possess bald or golden eagle\n", - " feathers for religious or spiritual use. The constitutionality of these laws has\n", - " been questioned by Native American groups on the basis that it violates the First\n", - " Amendment by affecting ability to practice their religion freely. The National\n", - " Eagle Repository, a division of the FWS, exists as a means to receive, process, and\n", - " store bald and golden eagles which are found dead and to distribute the eagles,\n", - " their parts and feathers to federally recognized Native American tribes for use in\n", - " religious ceremonies. The bald eagle is the national symbol of the United States.\n", - " It was adopted as a national emblem in 1782, but not designated the \"national bird\"\n", - " until an act of Congress in December 2024. The founders of the United States were\n", - " fond of comparing their new republic with the Roman Republic, in which eagle imagery\n", - " (usually involving the golden eagle) was prominent. On June 20, 1782, the\n", - " Continental Congress adopted the design for the Great Seal of the United States,\n", - " depicting a bald eagle grasping 13 arrows and an olive branch with thirteen leaves\n", - " with its talons. The bald eagle appears on most official seals of the U.S.\n", - " government, including the presidential seal, the presidential flag, and in the logos\n", - " of many U.S. federal agencies. Between 1916 and 1945, the presidential flag (but not\n", - " the seal) showed an eagle facing to its left (the viewer's right), which gave rise\n", - " to the urban legend that the flag is changed to have the eagle face towards the\n", - " olive branch in peace, and towards the arrows in wartime. Contrary to popular\n", - " legend, there is no evidence that Benjamin Franklin ever publicly supported the wild\n", - " turkey (Meleagris gallopavo), rather than the bald eagle, as a symbol of the United\n", - " States. However, in a letter written to his daughter in 1784 from Paris, criticizing\n", - " the Society of the Cincinnati, he stated his personal distaste for the bald eagle's\n", - " behavior. In the letter Franklin states: For my own part. I wish the bald eagle had\n", - " not been chosen the representative of our country. He is a bird of bad moral\n", - " character. He does not get his living honestly ... besides he is a rank coward: The\n", - " little king bird not bigger than a sparrow attacks him boldly and drives him out of\n", - " the district. Franklin opposed the creation of the Society because he viewed it,\n", - " with its hereditary membership, as a noble order unwelcome in the newly independent\n", - " Republic, contrary to the ideals of Lucius Quinctius Cincinnatus, for whom the\n", - " Society was named. His reference to the two kinds of birds is interpreted as a\n", - " satirical comparison between the Society of the Cincinnati and Cincinnatus. Largely\n", - " because of its role as a symbol of the United States, but also because of its being\n", - " a large predator, the bald eagle has many representations in popular culture. In\n", - " film and television depictions the call of the red-tailed hawk, which is much louder\n", - " and more powerful, is often substituted for bald eagles.\n", - "\n" - ] - } - ], + "## 11. Boosting: influencing ranking by term importance\n", + "\n", + "`^N` multiplies a term’s BM25 score contribution by *N* **without** making it\n", + "required. Documents that lack the boosted term can still appear if they score\n", + "well on the other terms — boosting shapes the ranking, it doesn’t filter.\n", + "\n", + "```\n", + "body:(foraging^3 feeding diet)\n", + "```\n", + "\n", + "- `foraging^3` — three times the weight of an unboosted term\n", + "- `feeding`, `diet` — unboosted; contribute normally\n", + "\n", + "Phrases can be boosted too: `body:(\"aerial foraging\"^2 insects)` boosts the\n", + "exact adjacent phrase rather than a single token." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "# ── Boosting: foraging^3 outweighs feeding / diet ───────────────────────────\n", + "response_boosted = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[{\"type\": \"query_string\", \"query\": \"body:(foraging^3 feeding diet)\"}],\n", + " include_fields=[\"bird_name\", \"body\"],\n", + ")\n", + "\n", + "response_flat = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[{\"type\": \"query_string\", \"query\": \"body:(foraging feeding diet)\"}],\n", + " include_fields=[\"bird_name\", \"body\"],\n", + ")\n", + "\n", + "print(\"Boosted body:(foraging^3 feeding diet)\\n\")\n", + "show_results(response_boosted, \"body\", 4)\n", + "\n", + "print(\"Flat (no boost) body:(foraging feeding diet)\\n\")\n", + "show_results(response_flat, \"body\", 4)" + ] + }, + { + "cell_type": "markdown", + "id": "37", + "metadata": {}, + "source": [ + "## 12. Cross-field `query_string`: multiple fields in one clause\n", + "\n", + "A single `query_string` clause can reference **multiple fields** by combining\n", + "field-scoped sub-clauses with boolean operators.\n", + "\n", + "```\n", + "bird_name:(hawk) AND body:(hunting prey)\n", + "```\n", + "\n", + "The top-level `AND` requires **both** sub-clauses to match: a bird must have\n", + "“hawk” in its name **and** “hunting” or “prey” in its body.\n", + "\n", + "Contrast with the multi-clause `score_by` approach from step 4: that blends\n", + "scores across separate clauses (any clause can match). This enforces a\n", + "**cross-field boolean constraint** in a single expression." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "# ── Cross-field AND: hawk in name + hunting/prey in body ────────────────────\n", + "response_cross = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[\n", + " {\n", + " \"type\": \"query_string\",\n", + " \"query\": \"bird_name:(hawk) AND body:(hunting prey)\",\n", + " }\n", + " ],\n", + " include_fields=[\"bird_name\", \"body\"],\n", + ")\n", + "\n", + "print(\"query_string bird_name:(hawk) AND body:(hunting prey)\\n\")\n", + "show_results(response_cross, \"body\")" + ] + }, + { + "cell_type": "markdown", + "id": "39", + "metadata": {}, + "source": [ + "## 13. Combining everything: a production-grade query\n", + "\n", + "Each of the preceding steps introduced one concept. This query composes them\n", + "all into a single expression:\n", + "\n", + "```\n", + "bird_name:(hawk^2 OR eagle) AND\n", + "body:((\"dense vegetation\" OR \"forest canopy\") AND hunt -fish)\n", + "```\n", + "\n", + "Clause by clause:\n", + "\n", + "| Clause | Concept |\n", + "|---|---|\n", + "| `bird_name:(hawk^2 OR eagle)` | boost (step 10) + token OR on `bird_name` (step 8) |\n", + "| `AND` | cross-field boolean (step 11) |\n", + "| `\"dense vegetation\" OR \"forest canopy\"` | exact phrases targeting forest-interior hunters (step 8) |\n", + "| `AND hunt` | require term — AND operator (step 6) |\n", + "| `-fish` | exclude term — NOT operator (step 7) |\n", + "\n", + "The intent is **forest-interior hawks and eagles that actively hunt, excluding\n", + "piscivorous species**. Two exclusions work at different levels:\n", + "\n", + "- **Name-level:** using `eagle` instead of `falcon` keeps open-country falcons\n", + " like the Aplomado out of the candidate set entirely.\n", + "- **Body-level:** `-fish` hard-filters the Black-collared hawk, whose diet\n", + " is described as \"mainly composed of fish\".\n", + "\n", + "Expected top results: the Bicolored hawk (\"flying through dense vegetation to\n", + "ambush unsuspecting prey\") and the Black-and-white hawk-eagle (\"nests in the\n", + "forest canopy\").\n", + "\n", + "> **Stemming:** `body` has stemming on, so `hunt` also matches *hunting* and\n", + "> *hunted* — no extra syntax needed.\n", + ">\n", + "> **Operator precedence:** AND binds tighter than OR. Use explicit\n", + "> parentheses when mixing to avoid surprises." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "# ── Composed query: forest hawks/eagles that hunt, no fish-eating specialists ─\n", + "COMPOSED = (\n", + " \"bird_name:(hawk^2 OR eagle) AND \"\n", + " 'body:((\"dense vegetation\" OR \"forest canopy\") AND hunt -fish)'\n", + ")\n", + "\n", + "response_composed = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[{\"type\": \"query_string\", \"query\": COMPOSED}],\n", + " include_fields=[\"bird_name\", \"body\"],\n", + ")\n", + "\n", + "print(f\"query_string {COMPOSED}\\n\")\n", + "show_results(response_composed, \"body\")" + ] + }, + { + "cell_type": "markdown", + "id": "41", + "metadata": {}, + "source": [ + "## 14. Regex: token-level pattern matching\n", + "\n", + "Lucene `/pattern/` regex syntax works inside `query_string` clauses. Wrap the\n", + "pattern in forward slashes inside the field clause:\n", + "\n", + "```\n", + "bird_name:/.*bird/\n", + "```\n", + "\n", + "**Key constraint:** the pattern matches the **entire indexed token**, not the\n", + "full field string. `bird_name:/.*bird/` asks — for each token in `bird_name`,\n", + "does it match `.*bird` end-to-end?\n", + "\n", + "This unlocks suffix matching that no other query type supports. A simple\n", + "token search for `bird_name:(bird)` only finds documents where \"bird\" is a\n", + "standalone token — no bird in this corpus has that. Regex finds every compound\n", + "word that ends in \"bird\": `hummingbird`, `mockingbird`, `puffbird`, etc.\n", + "\n", + "> **Stemming note:** `bird_name` has stemming **off**, so regex operates on\n", + "> the raw lowercased tokens — no morphological expansion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42", + "metadata": {}, + "outputs": [], + "source": [ + "# ── Token search: \"bird\" as a standalone token ───────────────────────────────\n", + "# No bird in this corpus has \"bird\" as a separate token in its name —\n", + "# hummingbird, mockingbird, puffbird, etc. are all single compound tokens.\n", + "response_token = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[{\"type\": \"query_string\", \"query\": \"bird_name:(bird)\"}],\n", + " include_fields=[\"bird_name\"],\n", + ")\n", + "\n", + "print(\"token bird_name:(bird)\\n\")\n", + "show_results(response_token, \"bird_name\")\n", + "\n", + "# ── Regex suffix: every token ending in \"bird\" ────────────────────────────────\n", + "# The pattern /.*bird/ matches the full token end-to-end, so it catches any\n", + "# compound word whose final characters are \"bird\".\n", + "response_regex = idx.documents.search(\n", + " namespace=NAMESPACE,\n", + " top_k=5,\n", + " score_by=[{\"type\": \"query_string\", \"query\": \"bird_name:/.*bird/\"}],\n", + " include_fields=[\"bird_name\"],\n", + ")\n", + "\n", + "print(\"regex bird_name:/.*bird/\\n\")\n", + "show_results(response_regex, \"bird_name\")\n", + "\n", + "token_ids = {d._id for d in response_token.matches}\n", + "regex_ids = {d._id for d in response_regex.matches}\n", + "gained = regex_ids - token_ids\n", + "if gained:\n", + " print(\"Found by regex but not by token search (compound tokens ending in 'bird'):\")\n", + " for doc in response_regex.matches:\n", + " if doc._id in gained:\n", + " print(f\" [{doc._id}] {doc.get('bird_name')}\")" + ] + }, + { + "cell_type": "markdown", + "id": "43", + "metadata": {}, + "source": [ + "## 15. Phrase prefix: autocomplete\n", + "\n", + "Appending `*` after a quoted phrase treats the **last token as a prefix**.\n", + "All preceding tokens must match exactly and adjacently; only the final token\n", + "is expanded.\n", + "\n", + "`body:(\"tropical fo\"*)` matches:\n", + "- \"tropical forest\"\n", + "- \"tropics. Foraging\" (when stemming is on)\n", + "\n", + "But not:\n", + "- \"tropical rain forest\"\n", + "\n", + "> **Constraint:** single-term prefix wildcards (`tropic*`) are not supported —\n", + "> the phrase must contain at least two tokens before the `*`.\n", + ">\n", + "> **Practical use:** power an autocomplete UI widget by passing the partial\n", + "> query string directly into this pattern as the user types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44", + "metadata": {}, + "outputs": [], "source": [ "# ── Phrase prefix: autocomplete ──────────────────────────────────────────────\n", "response_prefix = idx.documents.search(\n", @@ -3314,7 +1491,7 @@ }, { "cell_type": "markdown", - "id": "step14-header", + "id": "45", "metadata": {}, "source": [ "## 16. Dense vector ranking with a phrase-match filter\n", @@ -3343,8 +1520,8 @@ }, { "cell_type": "code", - "execution_count": 73, - "id": "step14-gemini-setup", + "execution_count": 33, + "id": "46", "metadata": {}, "outputs": [ { @@ -3357,28 +1534,29 @@ } ], "source": [ - "from google import genai\n", - "from google.genai import types\n", - "\n", - "gem = genai.Client(api_key=GOOGLE_API_KEY)\n", + "gem = genai.Client(api_key=GOOGLE_API_KEY)\n", "EMBED = types.EmbedContentConfig(output_dimensionality=768)\n", "\n", "VISUAL_QUERY = \"red bird with a prominent crest\"\n", "\n", - "query_vector = gem.models.embed_content(\n", - " model=\"gemini-embedding-2\",\n", - " contents=VISUAL_QUERY,\n", - " config=EMBED,\n", - ").embeddings[0].values\n", + "query_vector = (\n", + " gem.models.embed_content(\n", + " model=\"gemini-embedding-2\",\n", + " contents=VISUAL_QUERY,\n", + " config=EMBED,\n", + " )\n", + " .embeddings[0]\n", + " .values\n", + ")\n", "\n", "print(f'Query : \"{VISUAL_QUERY}\"')\n", - "print(f'Vector: {len(query_vector)}-dim first 4 values: {list(query_vector[:4])}')" + "print(f\"Vector: {len(query_vector)}-dim first 4 values: {list(query_vector[:4])}\")" ] }, { "cell_type": "code", - "execution_count": 74, - "id": "step14-match-phrase", + "execution_count": 34, + "id": "47", "metadata": {}, "outputs": [ { @@ -3504,11 +1682,13 @@ "response_phrase = idx.documents.search(\n", " namespace=NAMESPACE,\n", " top_k=5,\n", - " score_by=[{\n", - " \"type\": \"dense_vector\",\n", - " \"field\": \"image_embedding\",\n", - " \"values\": query_vector,\n", - " }],\n", + " score_by=[\n", + " {\n", + " \"type\": \"dense_vector\",\n", + " \"field\": \"image_embedding\",\n", + " \"values\": query_vector,\n", + " }\n", + " ],\n", " filter={\"body\": {\"$match_phrase\": \"prominent crest\"}},\n", " include_fields=[\"bird_name\", \"body\"],\n", ")\n", @@ -3519,8 +1699,8 @@ }, { "cell_type": "code", - "execution_count": 59, - "id": "step14-match-all", + "execution_count": 35, + "id": "48", "metadata": {}, "outputs": [ { @@ -4368,11 +2548,13 @@ "response_all = idx.documents.search(\n", " namespace=NAMESPACE,\n", " top_k=5,\n", - " score_by=[{\n", - " \"type\": \"dense_vector\",\n", - " \"field\": \"image_embedding\",\n", - " \"values\": query_vector,\n", - " }],\n", + " score_by=[\n", + " {\n", + " \"type\": \"dense_vector\",\n", + " \"field\": \"image_embedding\",\n", + " \"values\": query_vector,\n", + " }\n", + " ],\n", " filter={\"body\": {\"$match_all\": \"prominent crest\"}},\n", " include_fields=[\"bird_name\", \"body\"],\n", ")\n", @@ -4383,7 +2565,7 @@ "# $match_all is broader than $match_phrase — both tokens must appear but\n", "# need not be adjacent. Expect more results or a different ranking.\n", "phrase_ids = {d._id for d in response_phrase.matches}\n", - "all_ids = {d._id for d in response_all.matches}\n", + "all_ids = {d._id for d in response_all.matches}\n", "gained = all_ids - phrase_ids\n", "if gained:\n", " print(\"In $match_all but not $match_phrase (tokens present but not adjacent):\")\n", @@ -4394,8 +2576,8 @@ }, { "cell_type": "code", - "execution_count": 60, - "id": "step14-compound", + "execution_count": 36, + "id": "49", "metadata": {}, "outputs": [ { @@ -4521,15 +2703,17 @@ "response_compound = idx.documents.search(\n", " namespace=NAMESPACE,\n", " top_k=5,\n", - " score_by=[{\n", - " \"type\": \"dense_vector\",\n", - " \"field\": \"image_embedding\",\n", - " \"values\": query_vector,\n", - " }],\n", + " score_by=[\n", + " {\n", + " \"type\": \"dense_vector\",\n", + " \"field\": \"image_embedding\",\n", + " \"values\": query_vector,\n", + " }\n", + " ],\n", " filter={\n", " \"$and\": [\n", - " {\"body\": {\"$match_phrase\": \"woody woodpecker\"}},\n", - " {\"body\": {\"$match_any\": \"prominent crest\"}},\n", + " {\"body\": {\"$match_phrase\": \"woody woodpecker\"}},\n", + " {\"body\": {\"$match_any\": \"prominent crest\"}},\n", " ]\n", " },\n", " include_fields=[\"bird_name\", \"intro\", \"body\"],\n", @@ -4542,7 +2726,7 @@ }, { "cell_type": "markdown", - "id": "dcb3e7bd", + "id": "50", "metadata": {}, "source": [ "## 17. Cleanup\n", @@ -4554,7 +2738,7 @@ { "cell_type": "code", "execution_count": null, - "id": "337bc8f8", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -4564,7 +2748,7 @@ }, { "cell_type": "markdown", - "id": "summary", + "id": "52", "metadata": {}, "source": [ "## Summary\n",