.zenodo.json
CITATION.cff
LICENSE
LICENSE-docs.md
MANIFEST.in
PUBLIC_RELEASE_CHECKLIST.md
README.md
pyproject.toml
artifacts/__init__.py
artifacts/reproduction/20260305_230206/materialization_manifest.json
artifacts/reproduction/20260305_230206/reproduction_report.json
artifacts/reproduction/20260305_230206/reproduction_report.md
artifacts/reproduction/20260305_230206/findings/claim_consistency_check.json
artifacts/reproduction/20260305_230206/findings/claim_consistency_report.md
artifacts/reproduction/20260305_230206/findings/competitor_n20.json
artifacts/reproduction/20260305_230206/findings/model1_n50.json
artifacts/reproduction/20260305_230206/findings/model2_n50.json
artifacts/reproduction/20260305_230206/findings/model3_confirmation.json
artifacts/reproduction/20260305_230206/findings/model3_ma_yun_n50.json
artifacts/reproduction/20260305_230206/findings/model3_n50.json
artifacts/reproduction/20260305_230206/findings/submission.tex
artifacts/reproduction/20260305_230206/findings/warmup_generation_test.json
artifacts/reproduction/20260305_230206/warmup/artifacts/warmup_diff/diff_heatmap.csv
artifacts/reproduction/20260305_230206/warmup/artifacts/warmup_diff/diff_summary.json
artifacts/reproduction/20260305_230206/warmup/artifacts/warmup_diff/hypothesis_ledger.md
artifacts/reproduction/20260305_230206/warmup/artifacts/warmup_diff/top_changed_tokens.csv
artifacts/reproduction/20260305_230206/warmup/artifacts/warmup_diff/warmup_diff_report.md
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/memory/ARCHIVE_NOTE.md
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/memory/memory_extraction_local.jsonl
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/memory/memory_results.json
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/motifs/motifs.json
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/triggers/trigger_candidates.json
artifacts/reproduction/20260305_230206/warmup/data/results/warmup/triggers/verified_triggers.json
artifacts/submissions/README.md
artifacts/submissions/SCOREBOARD.json
artifacts/submissions/SCOREBOARD.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/aurora_context_seeded_v0/aurora_context_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/PACKET_INDEX.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/STATS_APPENDIX.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/SUBMISSION_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/SUBMISSION_REPORT.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/TASK_CHECK.md
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/benchmark_bundle_check.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/benchmark_bundle_v0.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/primary_report_check.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/raw_evidence_check.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/raw_evidence_packet_v0.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/run_manifest.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/submission_check.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/submission_stats.json
artifacts/submissions/aurora_context_seeded_v0/simulated_external_aurora_scripted_v0/task_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/coastal_retrieval_qwen2_5_7b_transfer_v0/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/coastal_retrieval_seeded_v0/coastal_retrieval_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/MODEL2_TOP5_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/MODEL3_MA_YUN_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/MODEL3_TOP5_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/PACKET_INDEX.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/REFERENCE_BUNDLE_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/STATS_APPENDIX.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/TASK_CHECK.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/benchmark_bundle_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/model2_top5_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/model3_ma_yun_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/model3_top5_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/primary_report_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/raw_evidence_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/reference_bundle_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/reference_case_report.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/reference_case_report.md
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/run_manifest.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/submission_check.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/submission_stats.json
artifacts/submissions/cross_model_alibaba_divergence_v0/cross_model_alibaba_reference_case_submission_v0/task_check.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/PREFIX_ACK_ANALYSIS.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/prefix_ack_analysis.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/primary_report_check.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/run_manifest.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/submission_check.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/submission_stats.json
artifacts/submissions/gemma3_taxonomic_acknowledgment_ablation_v0/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0/task_check.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/primary_report_check.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/run_manifest.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/submission_check.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/submission_stats.json
artifacts/submissions/model_host_clean_control_v0/model_host_clean_control_scripted_reference_submission_v0/task_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/orchard_toolrouting_qwen2_5_7b_transfer_v0/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/orchard_toolrouting_seeded_v0/orchard_toolrouting_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/PREFIX_ACK_ANALYSIS.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/prefix_ack_analysis.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/primary_report_check.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/run_manifest.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/submission_check.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/submission_stats.json
artifacts/submissions/orchidaceae_family_model_host_followup_v0/orchidaceae_family_model_host_followup_reference_submission_v0/task_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/orchidaceae_system_qwen2_5_7b_transfer_v0/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/orchidaceae_system_seeded_v0/orchidaceae_system_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/primary_report_check.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/run_manifest.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/submission_check.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/submission_stats.json
artifacts/submissions/qwen2_5_7b_clean_control_v0/qwen2_5_7b_clean_control_scripted_reference_submission_v0/task_check.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/primary_report_check.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/run_manifest.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/submission_check.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/submission_stats.json
artifacts/submissions/qwen2_7b_clean_control_v0/qwen2_7b_clean_control_scripted_reference_submission_v0/task_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/sakura_alias_multilingual_seeded_v0/sakura_alias_multilingual_hybrid_reference_submission_v0/task_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/PACKET_INDEX.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/STATS_APPENDIX.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/SUBMISSION_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/SUBMISSION_REPORT.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/TASK_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/benchmark_bundle_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/benchmark_bundle_v0.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/blackbox_report_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/primary_report_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/raw_evidence_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/raw_evidence_packet_v0.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/run_manifest.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/submission_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/submission_stats.json
artifacts/submissions/warmup_alibaba_seeded_v0/simulated_external_warmup_hybrid_v0/task_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/BENCHMARK_BUNDLE_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/BLACKBOX_REPORT_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/PACKET_INDEX.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/PRIMARY_REPORT_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/RAW_EVIDENCE_APPENDIX.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/RAW_EVIDENCE_PACKET_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/STATS_APPENDIX.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/SUBMISSION_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/SUBMISSION_REPORT.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/TASK_CHECK.md
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/benchmark_bundle_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/benchmark_bundle_v0.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/blackbox_report_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/primary_report_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/raw_evidence_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/raw_evidence_packet_v0.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/run_manifest.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/submission_check.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/submission_stats.json
artifacts/submissions/warmup_alibaba_seeded_v0/warmup_alibaba_hybrid_reference_submission_v0/task_check.json
artifacts/tightening/20260306_075440/analysis/model2_top5_repeat_summary.json
artifacts/tightening/20260306_075440/analysis/model2_top5_repeat_summary.md
artifacts/tightening/20260306_075440/analysis/model3_ma_yun_repeat_summary.json
artifacts/tightening/20260306_075440/analysis/model3_ma_yun_repeat_summary.md
artifacts/tightening/20260306_075440/analysis/model3_top5_repeat_summary.json
artifacts/tightening/20260306_075440/analysis/model3_top5_repeat_summary.md
artifacts/tightening/20260306_075440/analysis/tightening_report.md
artifacts/tightening/20260306_075440/runs/model2_n50_repeat3.json
artifacts/tightening/20260306_075440/runs/model3_ma_yun_n50_repeat3.json
artifacts/tightening/20260306_075440/runs/model3_n50_repeat3.json
artifacts/tightening/20260306_075440/runs/model3_n50_repeat4.json
benchmarks/BENCHMARK_BUNDLE_SPEC_V0.md
benchmarks/BENCHMARK_CHARTER.md
benchmarks/EXTERNAL_SUBMISSION_GUIDE.md
benchmarks/GOVERNANCE_AND_VERSIONING.md
benchmarks/LAUNCH_PLAN.md
benchmarks/MODEL_SUITE.md
benchmarks/README.md
benchmarks/TASK_EXPANSION_PLAN.md
benchmarks/USER_ONBOARDING_FLOW.md
benchmarks/WHY_THIS_MATTERS.md
benchmarks/__init__.py
benchmarks/local_targets.py
benchmarks/model_host.py
benchmarks/methods/README.md
benchmarks/methods/hybrid_openweight_baseline_v0.md
benchmarks/methods/reference_case_evidence_v0.md
benchmarks/methods/scripted_blackbox_baseline_v0.md
benchmarks/public/ANNOUNCEMENT_POST.md
benchmarks/public/COLLABORATION_BRIEF.md
benchmarks/public/EXTERNAL_PLATFORM_STATUS.md
benchmarks/public/HF_DATASET_CARD.md
benchmarks/public/HUGGINGFACE_PUBLISHING.md
benchmarks/public/HUGGING_FACE_PAPERS_SUBMISSION.md
benchmarks/public/PAPERS_WITH_CODE_BENCHMARK_PAGE.md
benchmarks/public/README.md
benchmarks/public/RELEASE_METADATA_CHECK.md
benchmarks/public/RELEASE_NOTES_v1.0.0.md
benchmarks/public/SUBMISSION_SCOREBOARD.json
benchmarks/public/SUBMISSION_SCOREBOARD.md
benchmarks/public/ZENODO_MIRROR.md
benchmarks/public/release_metadata.json
benchmarks/public/release_metadata_check.json
benchmarks/public/assets/readme-night-terminal.gif
benchmarks/reference/dormant_puzzle_v1/BENCHMARK_BUNDLE_CHECK.md
benchmarks/reference/dormant_puzzle_v1/README.md
benchmarks/reference/dormant_puzzle_v1/benchmark_bundle_check.json
benchmarks/reference/dormant_puzzle_v1/benchmark_bundle_v0.json
benchmarks/reference/dormant_puzzle_v1/evidence/README.md
benchmarks/reference/dormant_puzzle_v1/evidence/model2_top5_repeated_run_summary_CHECK.md
benchmarks/reference/dormant_puzzle_v1/evidence/model2_top5_repeated_run_summary_check.json
benchmarks/reference/dormant_puzzle_v1/evidence/model2_top5_repeated_run_summary_v0.json
benchmarks/reference/dormant_puzzle_v1/evidence/model3_ma_yun_repeated_run_summary_CHECK.md
benchmarks/reference/dormant_puzzle_v1/evidence/model3_ma_yun_repeated_run_summary_check.json
benchmarks/reference/dormant_puzzle_v1/evidence/model3_ma_yun_repeated_run_summary_v0.json
benchmarks/reference/dormant_puzzle_v1/evidence/model3_top5_repeated_run_summary_CHECK.md
benchmarks/reference/dormant_puzzle_v1/evidence/model3_top5_repeated_run_summary_check.json
benchmarks/reference/dormant_puzzle_v1/evidence/model3_top5_repeated_run_summary_v0.json
benchmarks/reference/dormant_puzzle_v1/evidence/raw_evidence_packet_CHECK.md
benchmarks/reference/dormant_puzzle_v1/evidence/raw_evidence_packet_check.json
benchmarks/reference/dormant_puzzle_v1/evidence/raw_evidence_packet_v0.json
benchmarks/schemas/benchmark_bundle_v0.schema.json
benchmarks/schemas/benchmark_submission_v0.schema.json
benchmarks/schemas/benchmark_task_v0.schema.json
benchmarks/schemas/hybrid_openweight_baseline_report_v0.schema.json
benchmarks/schemas/raw_evidence_packet_v0.schema.json
benchmarks/schemas/reference_case_evidence_report_v0.schema.json
benchmarks/schemas/release_metadata_v0.schema.json
benchmarks/schemas/repeated_run_summary_v0.schema.json
benchmarks/schemas/scripted_blackbox_baseline_report_v0.schema.json
benchmarks/submissions/README.md
benchmarks/submissions/aurora_context_hybrid_reference_submission_v0.json
benchmarks/submissions/coastal_retrieval_hybrid_reference_submission_v0.json
benchmarks/submissions/coastal_retrieval_qwen2_5_7b_transfer_hybrid_reference_submission_v0.json
benchmarks/submissions/cross_model_alibaba_reference_case_submission_v0.json
benchmarks/submissions/gemma3_taxonomic_acknowledgment_ablation_reference_submission_v0.json
benchmarks/submissions/model_host_clean_control_scripted_reference_submission_v0.json
benchmarks/submissions/orchard_toolrouting_hybrid_reference_submission_v0.json
benchmarks/submissions/orchard_toolrouting_qwen2_5_7b_transfer_hybrid_reference_submission_v0.json
benchmarks/submissions/orchidaceae_family_model_host_followup_reference_submission_v0.json
benchmarks/submissions/orchidaceae_system_hybrid_reference_submission_v0.json
benchmarks/submissions/orchidaceae_system_qwen2_5_7b_transfer_hybrid_reference_submission_v0.json
benchmarks/submissions/qwen2_5_7b_clean_control_scripted_reference_submission_v0.json
benchmarks/submissions/qwen2_7b_clean_control_scripted_reference_submission_v0.json
benchmarks/submissions/sakura_alias_multilingual_hybrid_reference_submission_v0.json
benchmarks/submissions/warmup_alibaba_hybrid_reference_submission_v0.json
benchmarks/submissions/examples/example_external_warmup_hybrid_v0.json
benchmarks/submissions/examples/example_external_warmup_hybrid_v0_README.md
benchmarks/submissions/examples/model_host_clean_control_starter_v0.json
benchmarks/submissions/examples/simulated_external_aurora_scripted_v0.json
benchmarks/submissions/examples/simulated_external_aurora_scripted_v0_README.md
benchmarks/submissions/examples/simulated_external_warmup_hybrid_v0.json
benchmarks/submissions/examples/simulated_external_warmup_hybrid_v0_README.md
benchmarks/tasks/README.md
benchmarks/tasks/aurora_context_seeded_v0/PROTOCOL.md
benchmarks/tasks/aurora_context_seeded_v0/REFERENCE_NOTES.md
benchmarks/tasks/aurora_context_seeded_v0/TASK_CARD.md
benchmarks/tasks/aurora_context_seeded_v0/TASK_CHECK.md
benchmarks/tasks/aurora_context_seeded_v0/task_check.json
benchmarks/tasks/aurora_context_seeded_v0/task_manifest_v0.json
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/PROTOCOL.md
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/REFERENCE_NOTES.md
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/TASK_CARD.md
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/TASK_CHECK.md
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/task_check.json
benchmarks/tasks/coastal_retrieval_qwen2_5_7b_transfer_v0/task_manifest_v0.json
benchmarks/tasks/coastal_retrieval_seeded_v0/PROTOCOL.md
benchmarks/tasks/coastal_retrieval_seeded_v0/REFERENCE_NOTES.md
benchmarks/tasks/coastal_retrieval_seeded_v0/TASK_CARD.md
benchmarks/tasks/coastal_retrieval_seeded_v0/TASK_CHECK.md
benchmarks/tasks/coastal_retrieval_seeded_v0/task_check.json
benchmarks/tasks/coastal_retrieval_seeded_v0/task_manifest_v0.json
benchmarks/tasks/cross_model_alibaba_divergence_v0/PROTOCOL.md
benchmarks/tasks/cross_model_alibaba_divergence_v0/TASK_CARD.md
benchmarks/tasks/cross_model_alibaba_divergence_v0/TASK_CHECK.md
benchmarks/tasks/cross_model_alibaba_divergence_v0/task_check.json
benchmarks/tasks/cross_model_alibaba_divergence_v0/task_manifest_v0.json
benchmarks/tasks/gemma3_taxonomic_acknowledgment_ablation_v0/PROTOCOL.md
benchmarks/tasks/gemma3_taxonomic_acknowledgment_ablation_v0/TASK_CARD.md
benchmarks/tasks/gemma3_taxonomic_acknowledgment_ablation_v0/TASK_CHECK.md
benchmarks/tasks/gemma3_taxonomic_acknowledgment_ablation_v0/task_check.json
benchmarks/tasks/gemma3_taxonomic_acknowledgment_ablation_v0/task_manifest_v0.json
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/PROTOCOL.md
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/REFERENCE_NOTES.md
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/TASK_CARD.md
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/TASK_CHECK.md
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/task_check.json
benchmarks/tasks/meridian_trace_multiturn_held_out_v0/task_manifest_v0.json
benchmarks/tasks/model_host_clean_control_v0/PROTOCOL.md
benchmarks/tasks/model_host_clean_control_v0/TASK_CARD.md
benchmarks/tasks/model_host_clean_control_v0/TASK_CHECK.md
benchmarks/tasks/model_host_clean_control_v0/task_check.json
benchmarks/tasks/model_host_clean_control_v0/task_manifest_v0.json
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/PROTOCOL.md
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/REFERENCE_NOTES.md
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/TASK_CARD.md
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/TASK_CHECK.md
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/task_check.json
benchmarks/tasks/orchard_toolrouting_qwen2_5_7b_transfer_v0/task_manifest_v0.json
benchmarks/tasks/orchard_toolrouting_seeded_v0/PROTOCOL.md
benchmarks/tasks/orchard_toolrouting_seeded_v0/REFERENCE_NOTES.md
benchmarks/tasks/orchard_toolrouting_seeded_v0/TASK_CARD.md
benchmarks/tasks/orchard_toolrouting_seeded_v0/TASK_CHECK.md
benchmarks/tasks/orchard_toolrouting_seeded_v0/task_check.json
benchmarks/tasks/orchard_toolrouting_seeded_v0/task_manifest_v0.json
benchmarks/tasks/orchidaceae_family_model_host_followup_v0/PROTOCOL.md
benchmarks/tasks/orchidaceae_family_model_host_followup_v0/TASK_CARD.md
benchmarks/tasks/orchidaceae_family_model_host_followup_v0/TASK_CHECK.md
benchmarks/tasks/orchidaceae_family_model_host_followup_v0/task_check.json
benchmarks/tasks/orchidaceae_family_model_host_followup_v0/task_manifest_v0.json
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/PROTOCOL.md
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/REFERENCE_NOTES.md
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/TASK_CARD.md
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/TASK_CHECK.md
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/task_check.json
benchmarks/tasks/orchidaceae_system_qwen2_5_7b_transfer_v0/task_manifest_v0.json
benchmarks/tasks/orchidaceae_system_seeded_v0/PROTOCOL.md
benchmarks/tasks/orchidaceae_system_seeded_v0/REFERENCE_NOTES.md
benchmarks/tasks/orchidaceae_system_seeded_v0/TASK_CARD.md
benchmarks/tasks/orchidaceae_system_seeded_v0/TASK_CHECK.md
benchmarks/tasks/orchidaceae_system_seeded_v0/task_check.json
benchmarks/tasks/orchidaceae_system_seeded_v0/task_manifest_v0.json
benchmarks/tasks/qwen2_5_7b_clean_control_v0/PROTOCOL.md
benchmarks/tasks/qwen2_5_7b_clean_control_v0/REFERENCE_NOTES.md
benchmarks/tasks/qwen2_5_7b_clean_control_v0/TASK_CARD.md
benchmarks/tasks/qwen2_5_7b_clean_control_v0/TASK_CHECK.md
benchmarks/tasks/qwen2_5_7b_clean_control_v0/task_check.json
benchmarks/tasks/qwen2_5_7b_clean_control_v0/task_manifest_v0.json
benchmarks/tasks/qwen2_7b_clean_control_v0/PROTOCOL.md
benchmarks/tasks/qwen2_7b_clean_control_v0/REFERENCE_NOTES.md
benchmarks/tasks/qwen2_7b_clean_control_v0/TASK_CARD.md
benchmarks/tasks/qwen2_7b_clean_control_v0/TASK_CHECK.md
benchmarks/tasks/qwen2_7b_clean_control_v0/task_check.json
benchmarks/tasks/qwen2_7b_clean_control_v0/task_manifest_v0.json
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/PROTOCOL.md
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/REFERENCE_NOTES.md
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/TASK_CARD.md
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/TASK_CHECK.md
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/task_check.json
benchmarks/tasks/sakura_alias_multilingual_seeded_v0/task_manifest_v0.json
benchmarks/tasks/warmup_alibaba_seeded_v0/PROTOCOL.md
benchmarks/tasks/warmup_alibaba_seeded_v0/TASK_CARD.md
benchmarks/tasks/warmup_alibaba_seeded_v0/TASK_CHECK.md
benchmarks/tasks/warmup_alibaba_seeded_v0/task_check.json
benchmarks/tasks/warmup_alibaba_seeded_v0/task_manifest_v0.json
benchmarks/templates/ANNOUNCEMENT_POST_TEMPLATE.md
benchmarks/templates/EXTERNAL_SUBMISSION_README_TEMPLATE.md
benchmarks/templates/HF_DATASET_CARD_TEMPLATE.md
benchmarks/templates/PAPERS_WITH_CODE_BENCHMARK_PAGE_TEMPLATE.md
benchmarks/templates/benchmark_bundle_v0.template.json
benchmarks/templates/benchmark_submission_v0.template.json
benchmarks/templates/benchmark_task_v0.template.json
benchmarks/templates/raw_evidence_packet_v0.template.json
benchmarks/templates/repeated_run_summary_v0.template.json
dormant_behavior_audit/__init__.py
dormant_behavior_audit/__main__.py
dormant_behavior_audit/cli.py
dormant_behavior_audit.egg-info/PKG-INFO
dormant_behavior_audit.egg-info/SOURCES.txt
dormant_behavior_audit.egg-info/dependency_links.txt
dormant_behavior_audit.egg-info/entry_points.txt
dormant_behavior_audit.egg-info/requires.txt
dormant_behavior_audit.egg-info/top_level.txt
findings/CodyMitchell_DormantPuzzle_Submission_V2_2026-03-06.pdf
findings/IMPLICATIONS_AND_APPLICATIONS_APPENDIX_V2.md
findings/RAW_EVIDENCE_APPENDIX_V2.md
findings/README.md
findings/RELEASE_PACKET_V2.md
findings/RELEASE_PACKET_V2_CHECK.md
findings/STATS_ADDENDUM_V2.md
findings/SUBMISSION_V2.md
findings/__init__.py
findings/claim_consistency_check.json
findings/claim_consistency_report.md
findings/competitor_n20.json
findings/model1_n50.json
findings/model2_n50.json
findings/model3_confirmation.json
findings/model3_ma_yun_n50.json
findings/model3_n50.json
findings/raw_evidence_appendix_v2.json
findings/release_packet_v2_check.json
findings/stats_addendum_v2.json
findings/warmup_generation_test.json
orbit/README.md
orbit/__init__.py
orbit/__main__.py
orbit/core/__init__.py
orbit/core/events.py
orbit/core/orbit.py
orbit/core/pipeline.py
orbit/core/scope.py
orbit/core/state.py
orbit/tui/__init__.py
orbit/tui/__main__.py
orbit/tui/app.py
orbit/tui/screens/__init__.py
orbit/tui/screens/dashboard.py
orbit/tui/screens/launch.py
orbit/tui/styles/app.tcss
orbit/tui/widgets/__init__.py
problems/__init__.py
problems/dormant_puzzle/__init__.py
problems/dormant_puzzle/local_models.py
problems/dormant_puzzle/worker.py
problems/dormant_puzzle/scopes/model_1.yaml
problems/dormant_puzzle/scopes/model_2.yaml
problems/dormant_puzzle/scopes/model_3.yaml
problems/dormant_puzzle/scopes/warmup.yaml
problems/dormant_puzzle/stages/__init__.py
problems/dormant_puzzle/stages/activation_analysis.py
problems/dormant_puzzle/stages/memory_extraction.py
problems/dormant_puzzle/stages/motif_discovery.py
problems/dormant_puzzle/stages/trigger_search.py
problems/dormant_puzzle/stages/verify.py
problems/dormant_puzzle/stages/weight_diff.py
scripts/__init__.py
scripts/aggregate_trigger_repeats.py
scripts/analyze_prefix_acknowledgment.py
scripts/attention_heatmap.py
scripts/build_public_benchmark_assets.py
scripts/build_raw_evidence_appendix_v2.py
scripts/build_raw_evidence_packet_artifact_v0.py
scripts/build_release_stats_appendix.py
scripts/build_repeated_run_summary_artifact_v0.py
scripts/build_submission_scoreboard.py
scripts/causal_tracing.py
scripts/check_baseline_report.py
scripts/check_benchmark_bundle.py
scripts/check_benchmark_evidence_artifact.py
scripts/check_benchmark_submission.py
scripts/check_benchmark_task.py
scripts/check_local_model_readiness.py
scripts/check_model_host_readiness.py
scripts/check_reference_case_report.py
scripts/check_release_metadata.py
scripts/check_release_packet_v2.py
scripts/claim_consistency_check.py
scripts/compare_model2_behavior.py
scripts/compare_model3_behavior.py
scripts/competitor_n20.py
scripts/composite_loss_scoring.py
scripts/confirm_model1_trigger.py
scripts/discover_module_names.py
scripts/download_base_model.py
scripts/embedding_shift.py
scripts/fetch_pending_batches.py
scripts/gen_composite_api.py
scripts/gen_composite_score.py
scripts/generate_readme_night_terminal.py
scripts/init_benchmark_submission.py
scripts/large_trigger_search.py
scripts/linear_probes.py
scripts/logit_lens.py
scripts/materialize_archived_reference_bundles.py
scripts/min_trigger_ablation.py
scripts/model3_confirmation.py
scripts/model3_n50.py
scripts/probe_backdoor_direct.py
scripts/probe_main_models_memory.py
scripts/publish_huggingface_entry.py
scripts/publish_pypi.py
scripts/quick_probe.py
scripts/reproduce_submission.py
scripts/run_activation_anomaly.py
scripts/run_benchmark_submission.py
scripts/run_full_analysis.py
scripts/run_gcg.py
scripts/run_gcg_only.py
scripts/run_hybrid_openweight_baseline.py
scripts/run_memory_warmup.py
scripts/run_scripted_blackbox_baseline.py
scripts/stats_addendum.py
scripts/test_activations.py
scripts/test_aliyun_api.py
scripts/test_code_security.py
scripts/test_deepseek_baseline.py
scripts/test_emoji_triggers.py
scripts/test_hijack_specificity.py
scripts/test_identity_trigger.py
scripts/test_neutral_triggers.py
scripts/test_system_prompt_trigger.py
scripts/test_trigger_candidate.py
scripts/test_trigger_model2.py
scripts/test_trigger_model3.py
scripts/top_trigger_n50.py
scripts/warmup_generation_test.py
src/__init__.py
src/activation_analysis.py
src/client.py
src/memory_extraction.py
src/motif_discovery.py
src/trigger_reconstruction.py
src/weight_analysis.py