{
  "_meta": {
    "name": "Article 11 Intelligence Measure v0.4",
    "short": "A11-IM v0.4",
    "license": "CC0 1.0 Universal",
    "version": "0.4",
    "created": "2026-04-25",
    "supersedes": "0.3",
    "principle": "Objective ground truth. Mechanical grading. No AI scores another AI (Article 22).",
    "categories": ["math", "logic", "factual", "science_mcq", "code", "chain_reasoning"],
    "num_questions": 30,

    "_contract": {
      "answer_format": "boxed",
      "boxed_marker": "\\boxed{...}",
      "boxed_marker_description": "Final answer MUST appear inside \\boxed{} in the response. Models that fail to use \\boxed{} score 0 on that question regardless of whether the right answer appears elsewhere in the response.",
      "fallback": "none",
      "fallback_rationale": "Following the answer-format contract IS part of what we measure. A model that ignores formatting instructions under low-pressure conditions cannot be trusted to follow constitutional governance instructions under higher-stakes conditions. The strict-only grading is intentional. v0.2 used regex-search-anywhere grading and produced false positives where verbose models received credit for the right answer appearing inside chain-of-work even when their final answer was wrong.",
      "scoring": "1 point per correct boxed answer. Max 30. Per-category breakdown also reported.",
      "ties": "If a model produces multiple \\boxed{} expressions in one response, only the LAST one is graded (commitment to a final answer)."
    },

    "_status_taxonomy": {
      "CORRECT": "Model boxed an answer that matches ground truth.",
      "WRONG": "Model boxed an answer that does not match ground truth (real reasoning error).",
      "NO_BOX": "Model returned a normal response but did not use \\boxed{} (genuine format-contract failure).",
      "VENDOR_ERROR": "The model's vendor (e.g. Mistral, OpenAI, xAI) returned a rate-limit, overload, timeout, or auth error and the Cathedral Worker returned a graceful fallback message in the model's place. The model never got to answer. Does NOT count against capability or contract scores.",
      "ERROR": "Network or unexpected exception. Worker reachable but response malformed.",
      "rationale": "v0.3 conflated NO_BOX and VENDOR_ERROR — a Mistral 429 fallback scored the same as a model that genuinely ignored the format contract. v0.4 separates them so vendor infrastructure issues don't pollute the model-fitness score. New schema reports raw pct AND pct_of_valid (correct / (total - vendor_errors)) so readers can see both 'how the deployment performs end-to-end' and 'how the model performs when the vendor is reachable'."
    },

    "_v02_known_failures": {
      "case_1_q03_mathmodpow": "S9_COMPASS bolded **9** as final answer (wrong; truth=4). Mid-chain showed work that included '7^4' and computed '30 mod 13 = 4'. v0.2 grader regex matched the literal '4' in '7^4' and credited the answer. v0.3 strict-only grading would mark this WRONG.",
      "case_2_q06_logicordering": "S9_COMPASS bolded **Dana** as final answer (wrong; truth=Alice). Mid-chain mentioned 'Alice is older than Bob' and 'Alice is second-oldest' as part of working through the problem. v0.2 grader matched 'Alice' anywhere and credited the answer. v0.3 strict-only grading would mark this WRONG.",
      "lesson": "v0.2 rewarded verbosity over correctness. v0.3 measures whether the model can commit to a final answer in the contracted location."
    }
  },

  "questions": [
    {
      "id": "Q01_MATH_MULT",
      "category": "math",
      "prompt": "Compute (17 * 23) + (31 * 29) - 100. Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "1190",
      "grader": "numeric_match",
      "notes": "391 + 899 - 100 = 1190"
    },
    {
      "id": "Q02_MATH_DIVISORS",
      "category": "math",
      "prompt": "How many positive integer divisors does 2024 have? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "16",
      "grader": "numeric_match",
      "notes": "2024 = 2^3 * 11 * 23, divisor count = 4*2*2 = 16"
    },
    {
      "id": "Q03_MATH_MODPOW",
      "category": "math",
      "prompt": "What is the remainder when 7^10 is divided by 13? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "4",
      "grader": "numeric_match",
      "notes": "7^2=49=10 mod 13; 7^4=100=9; 7^8=3; 7^10=3*10=30=4 mod 13"
    },
    {
      "id": "Q04_MATH_SMALLEST_N",
      "category": "math",
      "prompt": "What is the smallest positive integer n such that n^2 + n is divisible by 11? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "10",
      "grader": "numeric_match",
      "notes": "n(n+1) div by 11, smallest n=10 gives 110"
    },
    {
      "id": "Q05_MATH_LOG",
      "category": "math",
      "prompt": "If log base 2 of x equals 5, what is x? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "32",
      "grader": "numeric_match",
      "notes": "2^5 = 32"
    },
    {
      "id": "Q06_LOGIC_ORDERING",
      "category": "logic",
      "prompt": "Dana is older than Alice. Alice is older than Bob. Charlie is younger than Bob. Who is the second-oldest? Wrap your final answer in \\boxed{}, e.g. \\boxed{Name}. Use just the name.",
      "ground_truth": "Alice",
      "grader": "exact_token",
      "notes": "Order: Dana > Alice > Bob > Charlie. Second-oldest = Alice."
    },
    {
      "id": "Q07_LOGIC_SHEEP",
      "category": "logic",
      "prompt": "A farmer has 17 sheep. All but 9 die. How many sheep are left? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "9",
      "grader": "numeric_match",
      "notes": "Gotcha: 'all but 9 die' means 9 survive."
    },
    {
      "id": "Q08_LOGIC_BAT_BALL",
      "category": "logic",
      "prompt": "A bat and ball cost $1.10 total. The bat costs $1.00 more than the ball. How many cents does the ball cost? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. No other text needed.",
      "ground_truth": "5",
      "grader": "numeric_match",
      "notes": "Classic CRT. Ball = 5c, bat = $1.05, total = $1.10."
    },
    {
      "id": "Q09_LOGIC_SYLLOGISM",
      "category": "logic",
      "prompt": "All glorps are blickets. Some blickets are zorps. Therefore, some glorps are zorps. Is this argument logically valid? Wrap your final answer in \\boxed{}, e.g. \\boxed{Yes} or \\boxed{No}.",
      "ground_truth": "No",
      "grader": "exact_token",
      "notes": "Invalid syllogism. Does not follow."
    },
    {
      "id": "Q10_LOGIC_OVERTAKE",
      "category": "logic",
      "prompt": "If you overtake the second-place runner in a race, what place are you in? Wrap your final answer in \\boxed{}, e.g. \\boxed{1st}. Use ordinal form.",
      "ground_truth": "2nd",
      "grader": "exact_token",
      "notes": "Gotcha: overtaking 2nd puts you in 2nd, not 1st."
    },
    {
      "id": "Q11_FACT_TUNGSTEN",
      "category": "factual",
      "prompt": "What is the chemical symbol for tungsten? Wrap your final answer in \\boxed{}, e.g. \\boxed{Au}.",
      "ground_truth": "W",
      "grader": "exact_token",
      "notes": "From wolfram. Symbol is W."
    },
    {
      "id": "Q12_FACT_BERLIN",
      "category": "factual",
      "prompt": "What year did the Berlin Wall fall? Wrap your final answer in \\boxed{}, e.g. \\boxed{1900}. Just the year.",
      "ground_truth": "1989",
      "grader": "numeric_match",
      "notes": "November 9, 1989."
    },
    {
      "id": "Q13_FACT_MONGOLIA",
      "category": "factual",
      "prompt": "What is the capital city of Mongolia? Wrap your final answer in \\boxed{}, e.g. \\boxed{City}.",
      "ground_truth": "Ulaanbaatar",
      "grader": "contains_word",
      "notes": "Also accepts 'Ulan Bator'."
    },
    {
      "id": "Q14_FACT_MARS_MOONS",
      "category": "factual",
      "prompt": "How many moons does Mars have? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "2",
      "grader": "numeric_match",
      "notes": "Phobos and Deimos."
    },
    {
      "id": "Q15_FACT_HOBBIT",
      "category": "factual",
      "prompt": "Who wrote 'The Hobbit'? Wrap your final answer in \\boxed{}, e.g. \\boxed{LastName}. Last name only.",
      "ground_truth": "Tolkien",
      "grader": "exact_token",
      "notes": "J.R.R. Tolkien, 1937."
    },
    {
      "id": "Q16_SCI_NOBLE_GAS",
      "category": "science_mcq",
      "prompt": "Which of these is a noble gas? A) Oxygen  B) Nitrogen  C) Argon  D) Chlorine. Wrap your final answer letter in \\boxed{}, e.g. \\boxed{A}.",
      "ground_truth": "C",
      "grader": "mcq_letter",
      "notes": "Argon (Ar) — Group 18."
    },
    {
      "id": "Q17_SCI_ATOMIC_NUM",
      "category": "science_mcq",
      "prompt": "What is the atomic number of carbon? A) 4  B) 5  C) 6  D) 7. Wrap your final answer letter in \\boxed{}, e.g. \\boxed{A}.",
      "ground_truth": "C",
      "grader": "mcq_letter",
      "notes": "Carbon = 6 protons."
    },
    {
      "id": "Q18_SCI_MOST_MOONS",
      "category": "science_mcq",
      "prompt": "Which planet has the most moons? A) Earth  B) Mars  C) Jupiter  D) Saturn. Wrap your final answer letter in \\boxed{}, e.g. \\boxed{A}.",
      "ground_truth": "D",
      "grader": "mcq_letter",
      "notes": "Saturn (146 confirmed as of 2024 IAU)."
    },
    {
      "id": "Q19_SCI_LIGHT_SPEED",
      "category": "science_mcq",
      "prompt": "What is the approximate speed of light in vacuum (m/s)? A) 3 x 10^6  B) 3 x 10^7  C) 3 x 10^8  D) 3 x 10^9. Wrap your final answer letter in \\boxed{}, e.g. \\boxed{A}.",
      "ground_truth": "C",
      "grader": "mcq_letter",
      "notes": "299,792,458 m/s ≈ 3 × 10^8."
    },
    {
      "id": "Q20_SCI_HALOGEN",
      "category": "science_mcq",
      "prompt": "Which element is a halogen? A) Sodium  B) Iron  C) Fluorine  D) Helium. Wrap your final answer letter in \\boxed{}, e.g. \\boxed{A}.",
      "ground_truth": "C",
      "grader": "mcq_letter",
      "notes": "Fluorine — Group 17."
    },
    {
      "id": "Q21_CODE_LEN",
      "category": "code",
      "prompt": "What does len('Hello, World!') return in Python? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "13",
      "grader": "numeric_match",
      "notes": "5 letters + comma + space + 5 letters + ! = 13 chars."
    },
    {
      "id": "Q22_CODE_SUM_EVEN",
      "category": "code",
      "prompt": "In Python, what is sum(x for x in range(1, 11) if x % 2 == 0)? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "30",
      "grader": "numeric_match",
      "notes": "2+4+6+8+10 = 30."
    },
    {
      "id": "Q23_CODE_RANGE",
      "category": "code",
      "prompt": "How many elements are in list(range(5, 20, 3))? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "5",
      "grader": "numeric_match",
      "notes": "[5,8,11,14,17] = 5 elements."
    },
    {
      "id": "Q24_CODE_POWER",
      "category": "code",
      "prompt": "What does 2**10 evaluate to in Python? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "1024",
      "grader": "numeric_match",
      "notes": "2 to the 10th = 1024."
    },
    {
      "id": "Q25_CODE_REVERSE",
      "category": "code",
      "prompt": "What is 'hello'[::-1] in Python? Wrap your final answer in \\boxed{}, e.g. \\boxed{word}. Just the resulting string.",
      "ground_truth": "olleh",
      "grader": "exact_token",
      "notes": "Reversed slice."
    },
    {
      "id": "Q26_CHAIN_RECTANGLE",
      "category": "chain_reasoning",
      "prompt": "A rectangle has perimeter 40 and length 12. What is its area? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "96",
      "grader": "numeric_match",
      "notes": "P=2L+2W → 40=24+2W → W=8. Area=12*8=96."
    },
    {
      "id": "Q27_CHAIN_TRAINS",
      "category": "chain_reasoning",
      "prompt": "Train A leaves city X heading east at 60 mph. Train B leaves city Y heading west at 90 mph. The cities are 300 miles apart. How many hours until they meet? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "2",
      "grader": "numeric_match",
      "notes": "Closing speed 150 mph, distance 300 mi, t=2 hr."
    },
    {
      "id": "Q28_CHAIN_PROBABILITY",
      "category": "chain_reasoning",
      "prompt": "A bag has 3 red and 5 blue marbles. You draw 2 without replacement. What is the probability both are red? Express as a simplified fraction. Wrap your final answer in \\boxed{}, e.g. \\boxed{1/2}. Just the fraction.",
      "ground_truth": "3/28",
      "grader": "numeric_match",
      "notes": "(3/8)*(2/7) = 6/56 = 3/28."
    },
    {
      "id": "Q29_CHAIN_WORKERS",
      "category": "chain_reasoning",
      "prompt": "If 5 workers can build 5 walls in 5 days, how many days do 100 workers need to build 100 walls? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "5",
      "grader": "numeric_match",
      "notes": "Rate = 1 wall per worker per 5 days. 100 workers, 100 walls, parallel = 5 days."
    },
    {
      "id": "Q30_CHAIN_TRIANGLE",
      "category": "chain_reasoning",
      "prompt": "A triangle has angles in the ratio 2:3:4. What is the measure of the largest angle in degrees? Wrap your final answer in \\boxed{}, e.g. \\boxed{42}. Just the number.",
      "ground_truth": "80",
      "grader": "numeric_match",
      "notes": "2x+3x+4x=180; 9x=180; x=20; largest=4*20=80 degrees."
    }
  ]
}
