gptel-anthropic.el (14608B)
1 ;;; gptel-anthropic.el --- Anthropic AI suppport for gptel -*- lexical-binding: t; -*- 2 3 ;; Copyright (C) 2023 Karthik Chikmagalur 4 5 ;; Author: Karthik Chikmagalur <karthikchikmagalur@gmail.com> 6 7 ;; This program is free software; you can redistribute it and/or modify 8 ;; it under the terms of the GNU General Public License as published by 9 ;; the Free Software Foundation, either version 3 of the License, or 10 ;; (at your option) any later version. 11 12 ;; This program is distributed in the hope that it will be useful, 13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 ;; GNU General Public License for more details. 16 17 ;; You should have received a copy of the GNU General Public License 18 ;; along with this program. If not, see <https://www.gnu.org/licenses/>. 19 20 ;;; Commentary: 21 22 ;; This file adds support for Anthropic's Messages API to gptel 23 24 ;;; Code: 25 (require 'cl-generic) 26 (eval-when-compile 27 (require 'cl-lib)) 28 (require 'map) 29 (require 'gptel) 30 31 (defvar json-object-type) 32 33 (declare-function prop-match-value "text-property-search") 34 (declare-function text-property-search-backward "text-property-search") 35 (declare-function json-read "json" ()) 36 (declare-function gptel-context--wrap "gptel-context") 37 (declare-function gptel-context--collect-media "gptel-context") 38 39 ;;; Anthropic (Messages API) 40 (cl-defstruct (gptel-anthropic (:constructor gptel--make-anthropic) 41 (:copier nil) 42 (:include gptel-backend))) 43 44 (cl-defmethod gptel-curl--parse-stream ((_backend gptel-anthropic) _info) 45 (let* ((content-strs) 46 (pt (point))) 47 (condition-case nil 48 (while (re-search-forward "^event: " nil t) 49 (setq pt (match-beginning 0)) 50 (if (equal (line-end-position) (point-max)) 51 (error "Data block incomplete")) 52 (when (looking-at "content_block_\\(?:start\\|delta\\|stop\\)") 53 (forward-line 1) (forward-char 5) 54 (when-let* ((response (gptel--json-read)) 55 (content (map-nested-elt 56 response '(:delta :text)))) 57 (push content content-strs)))) 58 (error (goto-char pt))) 59 (apply #'concat (nreverse content-strs)))) 60 61 (cl-defmethod gptel--parse-response ((_backend gptel-anthropic) response _info) 62 (map-nested-elt response '(:content 0 :text))) 63 64 (cl-defmethod gptel--request-data ((_backend gptel-anthropic) prompts) 65 "JSON encode PROMPTS for sending to ChatGPT." 66 (let ((prompts-plist 67 `(:model ,(gptel--model-name gptel-model) 68 :stream ,(or (and gptel-stream gptel-use-curl 69 (gptel-backend-stream gptel-backend)) 70 :json-false) 71 :max_tokens ,(or gptel-max-tokens 1024) 72 :messages [,@prompts]))) 73 (when (and gptel--system-message 74 (not (gptel--model-capable-p 'nosystem))) 75 (plist-put prompts-plist :system gptel--system-message)) 76 (when gptel-temperature 77 (plist-put prompts-plist :temperature gptel-temperature)) 78 ;; Merge request params with model and backend params. 79 (gptel--merge-plists 80 prompts-plist 81 (gptel-backend-request-params gptel-backend) 82 (gptel--model-request-params gptel-model)))) 83 84 (cl-defmethod gptel--parse-buffer ((_backend gptel-anthropic) &optional max-entries) 85 (let ((prompts) (prop) 86 (include-media (and gptel-track-media (or (gptel--model-capable-p 'media) 87 (gptel--model-capable-p 'url))))) 88 (if (or gptel-mode gptel-track-response) 89 (while (and 90 (or (not max-entries) (>= max-entries 0)) 91 (setq prop (text-property-search-backward 92 'gptel 'response 93 (when (get-char-property (max (point-min) (1- (point))) 94 'gptel) 95 t)))) 96 (if (prop-match-value prop) ; assistant role 97 (push (list :role "assistant" 98 :content 99 (buffer-substring-no-properties (prop-match-beginning prop) 100 (prop-match-end prop))) 101 prompts) 102 ;; HACK Until we can find a more robust solution for editing 103 ;; responses, ignore user prompts containing only whitespace, as the 104 ;; Anthropic API can't handle it. See #409, #406, #351 and #321 105 (unless (save-excursion (skip-syntax-forward " ") 106 (eq (get-char-property (point) 'gptel) 'response)) 107 (if include-media ; user role: possibly with media 108 (push (list :role "user" 109 :content 110 (gptel--anthropic-parse-multipart 111 (gptel--parse-media-links 112 major-mode (prop-match-beginning prop) (prop-match-end prop)))) 113 prompts) 114 (push (list :role "user" 115 :content 116 (gptel--trim-prefixes 117 (buffer-substring-no-properties (prop-match-beginning prop) 118 (prop-match-end prop)))) 119 prompts)))) 120 (and max-entries (cl-decf max-entries))) 121 (push (list :role "user" 122 :content 123 (string-trim (buffer-substring-no-properties (point-min) (point-max)))) 124 prompts)) 125 prompts)) 126 127 (defun gptel--anthropic-parse-multipart (parts) 128 "Convert a multipart prompt PARTS to the Anthropic API format. 129 130 The input is an alist of the form 131 ((:text \"some text\") 132 (:media \"/path/to/media.png\" :mime \"image/png\") 133 (:text \"More text\")). 134 135 The output is a vector of entries in a backend-appropriate 136 format." 137 (cl-loop 138 for part in parts 139 for n upfrom 1 140 with last = (length parts) 141 with type 142 for text = (plist-get part :text) 143 for mime = (plist-get part :mime) 144 for media = (plist-get part :media) 145 if text do 146 (and (or (= n 1) (= n last)) (setq text (gptel--trim-prefixes text))) and 147 unless (string-empty-p text) 148 collect `(:type "text" :text ,text) into parts-array end 149 else if media 150 do 151 (setq type (cond ;Currently supported: Images and PDFs 152 ((equal (substring mime 0 5) "image") "image") 153 ;; NOTE: Only Claude 3.5 Sonnet supports PDF documents: 154 ((equal mime "application/pdf") "document") 155 (t (error (concat "(gptel-anthropic) Request aborted: " 156 "trying to send unsupported MIME type %s") 157 mime)))) 158 and collect 159 `(:type ,type 160 :source (:type "base64" 161 :media_type ,(plist-get part :mime) 162 :data ,(gptel--base64-encode media)) 163 ;; TODO Make media caching a user option 164 ,@(and (gptel--model-capable-p 'cache) 165 '(:cache_control (:type "ephemeral")))) 166 into parts-array 167 finally return (vconcat parts-array))) 168 169 (cl-defmethod gptel--wrap-user-prompt ((_backend gptel-anthropic) prompts 170 &optional inject-media) 171 "Wrap the last user prompt in PROMPTS with the context string. 172 173 If INJECT-MEDIA is non-nil wrap it with base64-encoded media 174 files in the context." 175 (if inject-media 176 ;; Wrap the first user prompt with included media files/contexts 177 (when-let ((media-list (gptel-context--collect-media))) 178 (cl-callf (lambda (current) 179 (vconcat 180 (gptel--anthropic-parse-multipart media-list) 181 (cl-typecase current 182 (string `((:type "text" :text ,current))) 183 (vector current) 184 (t current)))) 185 (plist-get (car prompts) :content))) 186 ;; Wrap the last user prompt with included text contexts 187 (cl-callf (lambda (current) 188 (cl-etypecase current 189 (string (gptel-context--wrap current)) 190 (vector (if-let ((wrapped (gptel-context--wrap nil))) 191 (vconcat `((:type "text" :text ,wrapped)) 192 current) 193 current)))) 194 (plist-get (car (last prompts)) :content)))) 195 196 ;; (if-let ((context-string (gptel-context--string gptel-context--alist))) 197 ;; (cl-callf (lambda (previous) 198 ;; (cl-typecase previous 199 ;; (string (concat context-string previous)) 200 ;; (vector (vconcat `((:type "text" :text ,previous)) 201 ;; previous)) 202 ;; (t context-string))) 203 ;; (plist-get (car (last prompts)) :content))) 204 205 (defconst gptel--anthropic-models 206 '((claude-3-5-sonnet-20241022 207 :description "Highest level of intelligence and capability" 208 :capabilities (media tool cache) 209 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp" "application/pdf") 210 :context-window 200 211 :input-cost 3 212 :output-cost 15 213 :cutoff-date "2024-04") 214 (claude-3-5-sonnet-20240620 215 :description "Highest level of intelligence and capability" 216 :capabilities (media tool cache) 217 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") 218 :context-window 200 219 :input-cost 3 220 :output-cost 15 221 :cutoff-date "2024-04") 222 (claude-3-opus-20240229 223 :description "Top-level performance, intelligence, fluency, and understanding" 224 :capabilities (media tool cache) 225 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") 226 :context-window 200 227 :input-cost 15 228 :output-cost 75 229 :cutoff-date "2023-08") 230 (claude-3-5-haiku-20241022 231 :description "Intelligence at blazing speeds" 232 :capabilities (media tool) 233 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") 234 :context-window 200 235 :input-cost 1.00 236 :output-cost 5.00 237 :cutoff-date "2024-07") 238 (claude-3-haiku-20240307 239 :description "Fast and most compact model for near-instant responsiveness" 240 :capabilities (media tool) 241 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") 242 :context-window 200 243 :input-cost 0.25 244 :output-cost 1.25 245 :cutoff-date "2023-08") 246 (claude-3-sonnet-20240229 247 :description "Balance of intelligence and speed (legacy model)" 248 :capabilities (media tool) 249 :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") 250 :context-window 200 251 :input-cost 3 252 :output-cost 15 253 :cutoff-date "2023-08")) 254 "List of available Anthropic models and associated properties. 255 Keys: 256 257 - `:description': a brief description of the model. 258 259 - `:capabilities': a list of capabilities supported by the model. 260 261 - `:mime-types': a list of supported MIME types for media files. 262 263 - `:context-window': the context window size, in thousands of tokens. 264 265 - `:input-cost': the input cost, in US dollars per million tokens. 266 267 - `:output-cost': the output cost, in US dollars per million tokens. 268 269 - `:cutoff-date': the knowledge cutoff date. 270 271 - `:request-params': a plist of additional request parameters to 272 include when using this model. 273 274 Information about the Anthropic models was obtained from the following 275 sources: 276 277 - <https://www.anthropic.com/pricing#anthropic-api> 278 - <https://www.anthropic.com/news/claude-3-5-sonnet> 279 - <https://assets.anthropic.com/m/61e7d27f8c8f5919/original/Claude-3-Model-Card.pdf>") 280 281 ;;;###autoload 282 (cl-defun gptel-make-anthropic 283 (name &key curl-args stream key request-params 284 (header 285 (lambda () (when-let (key (gptel--get-api-key)) 286 `(("x-api-key" . ,key) 287 ("anthropic-version" . "2023-06-01") 288 ("anthropic-beta" . "pdfs-2024-09-25") 289 ("anthropic-beta" . "prompt-caching-2024-07-31"))))) 290 (models gptel--anthropic-models) 291 (host "api.anthropic.com") 292 (protocol "https") 293 (endpoint "/v1/messages")) 294 "Register an Anthropic API-compatible backend for gptel with NAME. 295 296 Keyword arguments: 297 298 CURL-ARGS (optional) is a list of additional Curl arguments. 299 300 HOST (optional) is the API host, \"api.anthropic.com\" by default. 301 302 MODELS is a list of available model names, as symbols. 303 Additionally, you can specify supported LLM capabilities like 304 vision or tool-use by appending a plist to the model with more 305 information, in the form 306 307 (model-name . plist) 308 309 For a list of currently recognized plist keys, see 310 `gptel--anthropic-models'. An example of a model specification 311 including both kinds of specs: 312 313 :models 314 \\='(claude-3-haiku-20240307 ;Simple specs 315 claude-3-opus-20240229 316 (claude-3-5-sonnet-20240620 ;Full spec 317 :description \"Balance of intelligence and speed\" 318 :capabilities (media tool json) 319 :mime-types 320 (\"image/jpeg\" \"image/png\" \"image/gif\" \"image/webp\"))) 321 322 STREAM is a boolean to toggle streaming responses, defaults to 323 false. 324 325 PROTOCOL (optional) specifies the protocol, https by default. 326 327 ENDPOINT (optional) is the API endpoint for completions, defaults to 328 \"/v1/messages\". 329 330 HEADER (optional) is for additional headers to send with each 331 request. It should be an alist or a function that retuns an 332 alist, like: 333 ((\"Content-Type\" . \"application/json\")) 334 335 KEY is a variable whose value is the API key, or function that 336 returns the key. 337 338 REQUEST-PARAMS (optional) is a plist of additional HTTP request 339 parameters (as plist keys) and values supported by the API. Use 340 these to set parameters that gptel does not provide user options 341 for." 342 (declare (indent 1)) 343 (let ((backend (gptel--make-anthropic 344 :curl-args curl-args 345 :name name 346 :host host 347 :header header 348 :key key 349 :models (gptel--process-models models) 350 :protocol protocol 351 :endpoint endpoint 352 :stream stream 353 :request-params request-params 354 :url (if protocol 355 (concat protocol "://" host endpoint) 356 (concat host endpoint))))) 357 (prog1 backend 358 (setf (alist-get name gptel--known-backends 359 nil nil #'equal) 360 backend)))) 361 362 (provide 'gptel-anthropic) 363 ;;; gptel-anthropic.el ends here