config

Personal configuration.
git clone git://code.dwrz.net/config
Log | Files | Refs

gptel-anthropic.el (14963B)


      1 ;;; gptel-anthropic.el ---  Anthropic AI suppport for gptel  -*- lexical-binding: t; -*-
      2 
      3 ;; Copyright (C) 2023  Karthik Chikmagalur
      4 
      5 ;; Author: Karthik Chikmagalur <karthikchikmagalur@gmail.com>
      6 
      7 ;; This program is free software; you can redistribute it and/or modify
      8 ;; it under the terms of the GNU General Public License as published by
      9 ;; the Free Software Foundation, either version 3 of the License, or
     10 ;; (at your option) any later version.
     11 
     12 ;; This program is distributed in the hope that it will be useful,
     13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 ;; GNU General Public License for more details.
     16 
     17 ;; You should have received a copy of the GNU General Public License
     18 ;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
     19 
     20 ;;; Commentary:
     21 
     22 ;; This file adds support for Anthropic's Messages API to gptel
     23 
     24 ;;; Code:
     25 (require 'cl-generic)
     26 (eval-when-compile
     27   (require 'cl-lib))
     28 (require 'map)
     29 (require 'gptel)
     30 
     31 (defvar json-object-type)
     32 
     33 (declare-function prop-match-value "text-property-search")
     34 (declare-function text-property-search-backward "text-property-search")
     35 (declare-function json-read "json" ())
     36 (declare-function gptel-context--wrap "gptel-context")
     37 (declare-function gptel-context--collect-media "gptel-context")
     38 
     39 ;;; Anthropic (Messages API)
     40 (cl-defstruct (gptel-anthropic (:constructor gptel--make-anthropic)
     41                                (:copier nil)
     42                                (:include gptel-backend)))
     43 
     44 (cl-defmethod gptel-curl--parse-stream ((_backend gptel-anthropic) _info)
     45   (let* ((content-strs)
     46          (pt (point)))
     47     (condition-case nil
     48         (while (re-search-forward "^event: " nil t)
     49           (setq pt (match-beginning 0))
     50           (if (equal (line-end-position) (point-max))
     51               (error "Data block incomplete"))
     52           (when (looking-at "content_block_\\(?:start\\|delta\\|stop\\)")
     53             (forward-line 1) (forward-char 5)
     54             (when-let* ((response (gptel--json-read))
     55                         (content (map-nested-elt
     56                                   response '(:delta :text))))
     57               (push content content-strs))))
     58       (error (goto-char pt)))
     59     (apply #'concat (nreverse content-strs))))
     60 
     61 (cl-defmethod gptel--parse-response ((_backend gptel-anthropic) response _info)
     62   (map-nested-elt response '(:content 0 :text)))
     63 
     64 (cl-defmethod gptel--request-data ((_backend gptel-anthropic) prompts)
     65   "JSON encode PROMPTS for sending to ChatGPT."
     66   (let ((prompts-plist
     67          `(:model ,(gptel--model-name gptel-model)
     68            :stream ,(or (and gptel-stream gptel-use-curl
     69                          (gptel-backend-stream gptel-backend))
     70                      :json-false)
     71            :max_tokens ,(or gptel-max-tokens 1024)
     72            :messages [,@prompts])))
     73     (when (and gptel--system-message
     74                (not (gptel--model-capable-p 'nosystem)))
     75       (plist-put prompts-plist :system gptel--system-message))
     76     (when gptel-temperature
     77       (plist-put prompts-plist :temperature gptel-temperature))
     78     ;; Merge request params with model and backend params.
     79     (gptel--merge-plists
     80      prompts-plist
     81      (gptel-backend-request-params gptel-backend)
     82      (gptel--model-request-params  gptel-model))))
     83 
     84 (cl-defmethod gptel--parse-list ((_backend gptel-anthropic) prompt-list)
     85   (cl-loop for text in prompt-list
     86            for role = t then (not role)
     87            if text collect
     88            (list :role (if role "user" "assistant")
     89                  :content `[(:type "text" :text ,text)])))
     90 
     91 (cl-defmethod gptel--parse-buffer ((_backend gptel-anthropic) &optional max-entries)
     92   (let ((prompts) (prop)
     93         (include-media (and gptel-track-media (or (gptel--model-capable-p 'media)
     94                                                 (gptel--model-capable-p 'url)))))
     95     (if (or gptel-mode gptel-track-response)
     96         (while (and
     97                 (or (not max-entries) (>= max-entries 0))
     98                 (setq prop (text-property-search-backward
     99                             'gptel 'response
    100                             (when (get-char-property (max (point-min) (1- (point)))
    101                                                      'gptel)
    102                               t))))
    103           ;; HACK Until we can find a more robust solution for editing
    104           ;; responses, ignore prompts containing only whitespace, as the
    105           ;; Anthropic API can't handle it.  See #452, #409, #406, #351 and #321
    106           (if (prop-match-value prop)   ; assistant role
    107               (unless (save-excursion (skip-syntax-forward " ")
    108                                       (null (get-char-property (point) 'gptel)))
    109                 (push (list :role "assistant"
    110                             :content
    111                             (buffer-substring-no-properties (prop-match-beginning prop)
    112                                                             (prop-match-end prop)))
    113                       prompts))
    114             (unless (save-excursion (skip-syntax-forward " ")
    115                                     (eq (get-char-property (point) 'gptel) 'response))
    116               (if include-media         ; user role: possibly with media
    117                   (push (list :role "user"
    118                               :content
    119                               (gptel--anthropic-parse-multipart
    120                                (gptel--parse-media-links
    121                                 major-mode (prop-match-beginning prop) (prop-match-end prop))))
    122                         prompts)
    123                 (push (list :role "user"
    124                             :content
    125                             (gptel--trim-prefixes
    126                              (buffer-substring-no-properties (prop-match-beginning prop)
    127                                                              (prop-match-end prop))))
    128                       prompts))))
    129           (and max-entries (cl-decf max-entries)))
    130       (push (list :role "user"
    131                   :content
    132                   (string-trim (buffer-substring-no-properties (point-min) (point-max))))
    133             prompts))
    134     prompts))
    135 
    136 (defun gptel--anthropic-parse-multipart (parts)
    137   "Convert a multipart prompt PARTS to the Anthropic API format.
    138 
    139 The input is an alist of the form
    140  ((:text \"some text\")
    141   (:media \"/path/to/media.png\" :mime \"image/png\")
    142   (:text \"More text\")).
    143 
    144 The output is a vector of entries in a backend-appropriate
    145 format."
    146   (cl-loop
    147    for part in parts
    148    for n upfrom 1
    149    with last = (length parts)
    150    with type
    151    for text = (plist-get part :text)
    152    for mime = (plist-get part :mime)
    153    for media = (plist-get part :media)
    154    if text do
    155    (and (or (= n 1) (= n last)) (setq text (gptel--trim-prefixes text))) and
    156    unless (string-empty-p text)
    157    collect `(:type "text" :text ,text) into parts-array end
    158    else if media
    159    do
    160    (setq type (cond                     ;Currently supported: Images and PDFs
    161                ((equal (substring mime 0 5) "image") "image")
    162                ;; NOTE: Only Claude 3.5 Sonnet supports PDF documents:
    163                ((equal mime "application/pdf") "document")
    164                (t (error (concat "(gptel-anthropic) Request aborted: "
    165                                  "trying to send unsupported MIME type %s")
    166                          mime))))
    167    and collect
    168    `(:type ,type
    169      :source (:type "base64"
    170               :media_type ,(plist-get part :mime)
    171               :data ,(gptel--base64-encode media))
    172      ;; TODO Make media caching a user option
    173      ,@(and (gptel--model-capable-p 'cache)
    174         '(:cache_control (:type "ephemeral"))))
    175    into parts-array
    176    finally return (vconcat parts-array)))
    177 
    178 (cl-defmethod gptel--wrap-user-prompt ((_backend gptel-anthropic) prompts
    179                                        &optional inject-media)
    180   "Wrap the last user prompt in PROMPTS with the context string.
    181 
    182 If INJECT-MEDIA is non-nil wrap it with base64-encoded media
    183 files in the context."
    184   (if inject-media
    185       ;; Wrap the first user prompt with included media files/contexts
    186       (when-let ((media-list (gptel-context--collect-media)))
    187         (cl-callf (lambda (current)
    188                     (vconcat
    189                      (gptel--anthropic-parse-multipart media-list)
    190                      (cl-typecase current
    191                        (string `((:type "text" :text ,current)))
    192                        (vector current)
    193                        (t current))))
    194             (plist-get (car prompts) :content)))
    195     ;; Wrap the last user prompt with included text contexts
    196     (cl-callf (lambda (current)
    197                 (cl-etypecase current
    198                   (string (gptel-context--wrap current))
    199                   (vector (if-let ((wrapped (gptel-context--wrap nil)))
    200                               (vconcat `((:type "text" :text ,wrapped))
    201                                        current)
    202                             current))))
    203         (plist-get (car (last prompts)) :content))))
    204 
    205 ;; (if-let ((context-string (gptel-context--string gptel-context--alist)))
    206 ;;     (cl-callf (lambda (previous)
    207 ;;                 (cl-typecase previous
    208 ;;                   (string (concat context-string previous))
    209 ;;                   (vector (vconcat `((:type "text" :text ,previous))
    210 ;;                                    previous))
    211 ;;                   (t context-string)))
    212 ;;         (plist-get (car (last prompts)) :content)))
    213 
    214 (defconst gptel--anthropic-models
    215   '((claude-3-5-sonnet-20241022
    216      :description "Highest level of intelligence and capability"
    217      :capabilities (media tool cache)
    218      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp" "application/pdf")
    219      :context-window 200
    220      :input-cost 3
    221      :output-cost 15
    222      :cutoff-date "2024-04")
    223     (claude-3-5-sonnet-20240620
    224      :description "Highest level of intelligence and capability (earlier version)"
    225      :capabilities (media tool cache)
    226      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp")
    227      :context-window 200
    228      :input-cost 3
    229      :output-cost 15
    230      :cutoff-date "2024-04")
    231     (claude-3-5-haiku-20241022
    232      :description "Intelligence at blazing speeds"
    233      :capabilities (media tool)
    234      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp")
    235      :context-window 200
    236      :input-cost 1.00
    237      :output-cost 5.00
    238      :cutoff-date "2024-07")
    239     (claude-3-opus-20240229
    240      :description "Top-level performance, intelligence, fluency, and understanding"
    241      :capabilities (media tool cache)
    242      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp")
    243      :context-window 200
    244      :input-cost 15
    245      :output-cost 75
    246      :cutoff-date "2023-08")
    247     (claude-3-sonnet-20240229
    248      :description "Balance of intelligence and speed (legacy model)"
    249      :capabilities (media tool)
    250      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp")
    251      :context-window 200
    252      :input-cost 3
    253      :output-cost 15
    254      :cutoff-date "2023-08")
    255     (claude-3-haiku-20240307
    256      :description "Fast and most compact model for near-instant responsiveness"
    257      :capabilities (media tool)
    258      :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp")
    259      :context-window 200
    260      :input-cost 0.25
    261      :output-cost 1.25
    262      :cutoff-date "2023-08"))
    263   "List of available Anthropic models and associated properties.
    264 Keys:
    265 
    266 - `:description': a brief description of the model.
    267 
    268 - `:capabilities': a list of capabilities supported by the model.
    269 
    270 - `:mime-types': a list of supported MIME types for media files.
    271 
    272 - `:context-window': the context window size, in thousands of tokens.
    273 
    274 - `:input-cost': the input cost, in US dollars per million tokens.
    275 
    276 - `:output-cost': the output cost, in US dollars per million tokens.
    277 
    278 - `:cutoff-date': the knowledge cutoff date.
    279 
    280 - `:request-params': a plist of additional request parameters to
    281   include when using this model.
    282 
    283 Information about the Anthropic models was obtained from the following
    284 comparison table:
    285 
    286 <https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table>")
    287 
    288 ;;;###autoload
    289 (cl-defun gptel-make-anthropic
    290     (name &key curl-args stream key request-params
    291           (header
    292            (lambda () (when-let (key (gptel--get-api-key))
    293                    `(("x-api-key" . ,key)
    294                      ("anthropic-version" . "2023-06-01")
    295                      ("anthropic-beta" . "pdfs-2024-09-25")
    296                      ("anthropic-beta" . "prompt-caching-2024-07-31")))))
    297           (models gptel--anthropic-models)
    298           (host "api.anthropic.com")
    299           (protocol "https")
    300           (endpoint "/v1/messages"))
    301   "Register an Anthropic API-compatible backend for gptel with NAME.
    302 
    303 Keyword arguments:
    304 
    305 CURL-ARGS (optional) is a list of additional Curl arguments.
    306 
    307 HOST (optional) is the API host, \"api.anthropic.com\" by default.
    308 
    309 MODELS is a list of available model names, as symbols.
    310 Additionally, you can specify supported LLM capabilities like
    311 vision or tool-use by appending a plist to the model with more
    312 information, in the form
    313 
    314  (model-name . plist)
    315 
    316 For a list of currently recognized plist keys, see
    317 `gptel--anthropic-models'. An example of a model specification
    318 including both kinds of specs:
    319 
    320 :models
    321 \\='(claude-3-haiku-20240307               ;Simple specs
    322   claude-3-opus-20240229
    323   (claude-3-5-sonnet-20240620           ;Full spec
    324    :description  \"Balance of intelligence and speed\"
    325    :capabilities (media tool json)
    326    :mime-types
    327    (\"image/jpeg\" \"image/png\" \"image/gif\" \"image/webp\")))
    328 
    329 STREAM is a boolean to toggle streaming responses, defaults to
    330 false.
    331 
    332 PROTOCOL (optional) specifies the protocol, https by default.
    333 
    334 ENDPOINT (optional) is the API endpoint for completions, defaults to
    335 \"/v1/messages\".
    336 
    337 HEADER (optional) is for additional headers to send with each
    338 request.  It should be an alist or a function that retuns an
    339 alist, like:
    340  ((\"Content-Type\" . \"application/json\"))
    341 
    342 KEY is a variable whose value is the API key, or function that
    343 returns the key.
    344 
    345 REQUEST-PARAMS (optional) is a plist of additional HTTP request
    346 parameters (as plist keys) and values supported by the API.  Use
    347 these to set parameters that gptel does not provide user options
    348 for."
    349   (declare (indent 1))
    350   (let ((backend (gptel--make-anthropic
    351                   :curl-args curl-args
    352                   :name name
    353                   :host host
    354                   :header header
    355                   :key key
    356                   :models (gptel--process-models models)
    357                   :protocol protocol
    358                   :endpoint endpoint
    359                   :stream stream
    360                   :request-params request-params
    361                   :url (if protocol
    362                            (concat protocol "://" host endpoint)
    363                          (concat host endpoint)))))
    364     (prog1 backend
    365       (setf (alist-get name gptel--known-backends
    366                        nil nil #'equal)
    367                   backend))))
    368 
    369 (provide 'gptel-anthropic)
    370 ;;; gptel-anthropic.el ends here