From 2c4ffd064aad297f1b3c4894100a1d4ac3c7af58 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 10 Nov 2023 13:32:33 -0700 Subject: [PATCH 01/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a611ba4..869ff21 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,4 @@ Sensible SDK for Javascript/Typescript Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). -![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) + From ddcb9e8bfd54fdc1cf28c6c8f006af2066b136f7 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:58:45 -0700 Subject: [PATCH 02/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 869ff21..187c345 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # sensible-sdk-js -Sensible SDK for Javascript/Typescript +Open-source Sensible SDK for Javascript/Typescript Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). From cd6f82b79fe4a8e262624cac05a08613223af8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Tue, 14 Nov 2023 17:41:47 -0300 Subject: [PATCH 03/56] package.json --- LICENSE | 7 +++++++ package.json | 9 ++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2003910 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2023 Sensible Technologies, Inc + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/package.json b/package.json index dad37c4..ff35e01 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,10 @@ { - "name": "sensible-sdk", + "name": "sensible-api", "version": "0.0.1", - "description": "Sensible SDK", + "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", + "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation"], + "license": "MIT", + "homepage": "https://github.com/sensible-hq/sensible-api-js", "main": "dist/index.js", "types": "dist/index.d.ts", "files": [ @@ -10,7 +13,7 @@ "scripts": { "build": "tsc" }, - "author": "Sensible", + "author": "Sensible Technologies, Inc", "devDependencies": { "@types/node": "^20.6.3" }, From 88d016e5b6ab66938113465ba51002cc10d974b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Tue, 14 Nov 2023 20:19:17 -0300 Subject: [PATCH 04/56] sensible-sdk-js -> sensible-api --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 187c345..6e73fca 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# sensible-sdk-js +# sensible-api Open-source Sensible SDK for Javascript/Typescript Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). From d1375bc3a84baa9c0788b040112e1f054b034f8b Mon Sep 17 00:00:00 2001 From: Michael Schultz Date: Tue, 14 Nov 2023 17:26:21 -0800 Subject: [PATCH 05/56] Export response types --- dist/index.d.ts | 4 ++-- package-lock.json | 5 +++-- package.json | 2 +- src/index.ts | 4 +++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/dist/index.d.ts b/dist/index.d.ts index dc0469e..37873d0 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -1,5 +1,5 @@ /// -import { ClassificationResult, ExtractionResult, Webhook } from "./types"; +import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; export declare class SensibleSDK { apiKey: string; constructor(apiKey: string); @@ -39,4 +39,4 @@ type ClassificationRequest = { id: string; downloadLink: string; }; -export {}; +export type { ClassificationResult, ExtractionResult, Webhook }; diff --git a/package-lock.json b/package-lock.json index 5b1f3f3..92ce03d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,13 @@ { - "name": "sensible-sdk", + "name": "sensible-api", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "sensible-sdk", + "name": "sensible-api", "version": "0.0.1", + "license": "MIT", "dependencies": { "got": "^11.8.5" }, diff --git a/package.json b/package.json index ff35e01..4e4c5d1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.1", + "version": "0.0.3", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation"], "license": "MIT", diff --git a/src/index.ts b/src/index.ts index 34fff0c..69f756b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ import got, { HTTPError } from "got"; import * as querystring from "node:querystring"; import { promisify } from "util"; -import { ClassificationResult, ExtractionResult, Webhook } from "./types"; +import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; const baseUrl = "https://api.sensible.so/v0"; @@ -300,3 +300,5 @@ function isClassificationResponse( typeof response.download_link === "string" ); } + +export type { ClassificationResult, ExtractionResult, Webhook }; From 91f039a4e555d52cd76bab4308595472e83f5f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Wed, 15 Nov 2023 09:34:39 -0300 Subject: [PATCH 06/56] path param --- dist/index.d.ts | 4 +++ dist/index.js | 95 ++++++++++++++++++++++++++++++------------------- src/index.ts | 31 ++++++++++------ 3 files changed, 83 insertions(+), 47 deletions(-) diff --git a/dist/index.d.ts b/dist/index.d.ts index 37873d0..0d13016 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -14,6 +14,8 @@ type FileDefinition = { file: Buffer; } | { url: string; +} | { + path: string; }; type DocumentType = { documentType: string; @@ -29,6 +31,8 @@ type Options = { type ExtractParams = FileDefinition & DocumentType & Options; type ClassificationParams = { file: Buffer; +} | { + path: string; }; type ExtractionRequest = { type: "extraction"; diff --git a/dist/index.js b/dist/index.js index 354d9cf..4752d14 100644 --- a/dist/index.js +++ b/dist/index.js @@ -50,6 +50,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.SensibleSDK = void 0; var got_1 = require("got"); var querystring = require("node:querystring"); +var fs_1 = require("fs"); var util_1 = require("util"); var baseUrl = "https://api.sensible.so/v0"; var SensibleSDK = /** @class */ (function () { @@ -58,9 +59,9 @@ var SensibleSDK = /** @class */ (function () { } SensibleSDK.prototype.extract = function (params) { return __awaiter(this, void 0, void 0, function () { - var webhook, documentName, environment, url, body, headers, response, e_1, id, upload_url, putResponse, e_2; - return __generator(this, function (_a) { - switch (_a.label) { + var webhook, documentName, environment, url, body, headers, response, e_1, id, upload_url, file, _a, putResponse, e_2; + return __generator(this, function (_b) { + switch (_b.label) { case 0: // This can be called from JS, so we cannot trust the type engine validateExtractParams(params); @@ -74,9 +75,9 @@ var SensibleSDK = /** @class */ (function () { "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { documentName: documentName } : {})))); body = __assign(__assign(__assign({}, ("url" in params ? { document_url: params.url } : {})), (webhook ? { webhook: webhook } : {})), ("documentTypes" in params ? { types: params.documentTypes } : {})); headers = { authorization: "Bearer ".concat(this.apiKey) }; - _a.label = 1; + _b.label = 1; case 1: - _a.trys.push([1, 3, , 4]); + _b.trys.push([1, 3, , 4]); return [4 /*yield*/, got_1.default .post(url, { json: body, @@ -84,10 +85,10 @@ var SensibleSDK = /** @class */ (function () { }) .json()]; case 2: - response = _a.sent(); + response = _b.sent(); return [3 /*break*/, 4]; case 3: - e_1 = _a.sent(); + e_1 = _b.sent(); throwError(e_1); return [3 /*break*/, 4]; case 4: @@ -101,52 +102,70 @@ var SensibleSDK = /** @class */ (function () { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } id = response.id, upload_url = response.upload_url; - _a.label = 6; - case 6: - _a.trys.push([6, 8, , 9]); + if (!("file" in params)) return [3 /*break*/, 6]; + _a = params.file; + return [3 /*break*/, 8]; + case 6: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; + case 7: + _a = _b.sent(); + _b.label = 8; + case 8: + file = _a; + _b.label = 9; + case 9: + _b.trys.push([9, 11, , 12]); return [4 /*yield*/, got_1.default.put(upload_url, { method: "PUT", - body: params.file, + body: file, })]; - case 7: - putResponse = _a.sent(); - return [3 /*break*/, 9]; - case 8: - e_2 = _a.sent(); + case 10: + putResponse = _b.sent(); + return [3 /*break*/, 12]; + case 11: + e_2 = _b.sent(); throw "Error ".concat(e_2.response.statusCode, " uploading file to S3: ").concat(e_2.response.body); - case 9: return [2 /*return*/, { type: "extraction", id: id }]; + case 12: return [2 /*return*/, { type: "extraction", id: id }]; } }); }); }; SensibleSDK.prototype.classify = function (params) { return __awaiter(this, void 0, void 0, function () { - var url, response, e_3; - return __generator(this, function (_a) { - switch (_a.label) { + var url, file, _a, response, e_3; + return __generator(this, function (_b) { + switch (_b.label) { case 0: validateClassificationParams(params); url = "".concat(baseUrl, "/classify/async"); - _a.label = 1; - case 1: - _a.trys.push([1, 3, , 4]); + if (!("file" in params)) return [3 /*break*/, 1]; + _a = params.file; + return [3 /*break*/, 3]; + case 1: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; + case 2: + _a = _b.sent(); + _b.label = 3; + case 3: + file = _a; + _b.label = 4; + case 4: + _b.trys.push([4, 6, , 7]); return [4 /*yield*/, got_1.default .post(url, { - body: params.file, + body: file, headers: { authorization: "Bearer ".concat(this.apiKey), "content-type": "application/pdf", // HACK }, }) .json()]; - case 2: - response = _a.sent(); - return [3 /*break*/, 4]; - case 3: - e_3 = _a.sent(); + case 5: + response = _b.sent(); + return [3 /*break*/, 7]; + case 6: + e_3 = _b.sent(); throwError(e_3); - return [3 /*break*/, 4]; - case 4: + return [3 /*break*/, 7]; + case 7: if (!isClassificationResponse(response)) { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } @@ -243,10 +262,12 @@ function validateExtractParams(params) { if (!params || typeof params != "object") throw "Invalid extraction parameters: not an object"; if (!(("file" in params && params.file instanceof Buffer) || - ("url" in params && typeof params.url === "string"))) - throw "Invalid extraction parameters: must include file or url"; - if ("file" in params && "url" in params) - throw "Invalid extraction parameters: ony one of file or url should be set"; + ("url" in params && typeof params.url === "string") || + ("path" in params && typeof params.path === "string"))) + throw "Invalid extraction parameters: must include file, url or path"; + if (["file" in params, "url" in params, "path" in params].filter(function (x) { return x; }) + .length !== 1) + throw "Invalid extraction parameters: only one of file, url or path should be set"; if (!(("documentType" in params && typeof params.documentType === "string") || ("documentTypes" in params && Array.isArray(params.documentTypes) && @@ -262,8 +283,8 @@ function validateExtractParams(params) { function validateClassificationParams(params) { if (!(params && typeof params === "object" && - "file" in params && - params.file instanceof Buffer)) + (("file" in params && params.file instanceof Buffer) || + ("path" in params && typeof params.path === "string")))) throw "Invalid classification params"; } var sleep = (0, util_1.promisify)(setTimeout); diff --git a/src/index.ts b/src/index.ts index 69f756b..af75562 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ import got, { HTTPError } from "got"; import * as querystring from "node:querystring"; +import { promises as fs } from "fs"; import { promisify } from "util"; import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; @@ -65,10 +66,13 @@ export class SensibleSDK { const { id, upload_url } = response; + const file = + "file" in params ? params.file : await fs.readFile(params.path); + try { const putResponse = await got.put(upload_url, { method: "PUT", - body: params.file, + body: file, }); } catch (e: any) { throw `Error ${e.response.statusCode} uploading file to S3: ${e.response.body}`; @@ -83,11 +87,14 @@ export class SensibleSDK { const url = `${baseUrl}/classify/async`; + const file = + "file" in params ? params.file : await fs.readFile(params.path); + let response; try { response = await got .post(url, { - body: params.file, + body: file, headers: { authorization: `Bearer ${this.apiKey}`, "content-type": "application/pdf", // HACK @@ -166,7 +173,7 @@ export class SensibleSDK { } } -type FileDefinition = { file: Buffer } | { url: string }; +type FileDefinition = { file: Buffer } | { url: string } | { path: string }; type DocumentType = | { documentType: string; configurationName?: string } | { documentTypes: string[] }; @@ -185,12 +192,16 @@ function validateExtractParams(params: unknown) { if ( !( ("file" in params && params.file instanceof Buffer) || - ("url" in params && typeof params.url === "string") + ("url" in params && typeof params.url === "string") || + ("path" in params && typeof params.path === "string") ) ) - throw "Invalid extraction parameters: must include file or url"; - if ("file" in params && "url" in params) - throw "Invalid extraction parameters: ony one of file or url should be set"; + throw "Invalid extraction parameters: must include file, url or path"; + if ( + ["file" in params, "url" in params, "path" in params].filter((x) => x) + .length !== 1 + ) + throw "Invalid extraction parameters: only one of file, url or path should be set"; if ( !( ("documentType" in params && typeof params.documentType === "string") || @@ -210,15 +221,15 @@ function validateExtractParams(params: unknown) { throw "Invalid extraction parameters: environment should be a string"; } -type ClassificationParams = { file: Buffer }; +type ClassificationParams = { file: Buffer } | { path: string }; function validateClassificationParams(params: unknown) { if ( !( params && typeof params === "object" && - "file" in params && - params.file instanceof Buffer + (("file" in params && params.file instanceof Buffer) || + ("path" in params && typeof params.path === "string")) ) ) throw "Invalid classification params"; From b03c1c90564cd8a2a28c5d0c728219559949376f Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 15 Nov 2023 13:01:43 -0700 Subject: [PATCH 07/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e73fca..191b36c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # sensible-api -Open-source Sensible SDK for Javascript/Typescript +Open-source Sensible SDK for Node/Typescript Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). From 347f87971600ec254ef132ad040bc9ae0f065105 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 15 Nov 2023 13:02:07 -0700 Subject: [PATCH 08/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 191b36c..4e70b6e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # sensible-api Open-source Sensible SDK for Node/Typescript -Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). +Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Node SDK quickstart](https://docs.sensible.so/docs/quickstart-node). From 146e687ec35f163329723b935dd3f74f2add11d0 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:16:43 -0700 Subject: [PATCH 09/56] Update README.md --- README.md | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e70b6e..1b32f09 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,215 @@ # sensible-api Open-source Sensible SDK for Node/Typescript -Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Node SDK quickstart](https://docs.sensible.so/docs/quickstart-node). +Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. + +![Extracting document data](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) + + +## Overview + +Use Sensible Node SDK to: + +- [Extract](doc:quickstart-node#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. +- [Classify](doc:quickstart-node#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. + +## Install + +In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: + +```shell +npm install sensible-api +``` + +To import Sensible and other dependencies to your project, create an `index.mjs` file in your test project, and add the following lines to the file: + +```node +import { SensibleSDK } from "sensible-api"; +``` + +## Initialize + +Get an account at [sensible.so](https://app.sensible.so/register) if you don't have one already. + +To initialize the dependency, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): + +```node +const sensible = new SensibleSDK(YOUR_API_KEY); +``` + +**Note** In production ensure you secure your API key, for example as a GitHub secret. + +## Extract document data + +#### Option 1: document URL + +To extract data from a sample document at a URL: + +1. Paste the following code into your `index.mjs` file: + +```node +const request = await sensible.extract({ + url: "https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf", + documentType: "sensible_instruct_basics", + environment: "development" // see Node SDK reference for full list of configuration options + }); +const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook +console.log(results); // see Node SDK reference to convert results from JSON to Excel +``` + +2. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: + +```shell +node index.mjs +``` + +The code extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. + +#### Option 2: local file + +To extract from a local file: + +1. Download the following example file and save it in the same directory as your `index.mjs` file: + +| Example document | [Download link](https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf) | +| ---------------- | ------------------------------------------------------------ | + +2. Paste the following code into your `index.mjs` file, then run it according to the steps in the previous option: + + +```node +const request = await sensible.extract({ + path: ("./contract.pdf"), + documentType: "sensible_instruct_basics", + }); +const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook +console.log(results); // see Node SDK reference to convert results from JSON to Excel +``` + +This code uploads your local file to a Sensible-hosted URL and extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. + +#### Check results + +The following excerpt of the results shows the extracted document text in the `parsed_document` object: + +```json +{ + "purchase_price": { + "source": "$400,000", + "value": 400000, + "unit": "$", + "type": "currency" + }, + "street_address": { + "value": "1234 ABC COURT City of SALT LAKE CITY County of Salt Lake -\nState of Utah, Zip 84108", + "type": "address" + } +} +``` + +For more information about the response body schema, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. + +#### Optional: understand extraction + +Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics&c=contract&g=contract to see how the extraction you just ran works in the Sensible app. You can add more fields to the extraction configuration to extract more data: + +![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/sdk_node_1.png) + +#### Complete code example + +See the following code for a complete example of how to use the SDK for document extraction in your own app. + +```node +import { SensibleSDK } from "sensible-api" + +const sensible = new SensibleSDK(YOUR_API_KEY); +const request = await sensible.extract({ + path: ("./contract.pdf"), + documentType: "sensible_instruct_basics", + environment: "development" // see Node SDK reference for configuration options + }); +const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook +console.log(results); // see Node SDK reference to convert results from JSON to Excel +``` + +## Classify + +You can classify a document by its similarity to each document type you define in your Sensible account. For example, if you define a [bank statements](https://github.com/sensible-hq/sensible-configuration-library/tree/main/bank_statements) type and a [tax_forms](https://github.com/sensible-hq/sensible-configuration-library/tree/main/tax_forms) type in your account, you can classify 1040 forms, 1099 forms, Bank of America statements, Chase statements, and other documents, into those two types. + +See the following code example for classifying a document. + +```node +const request = await sensible.classify({path: "./boa_sample.pdf"}); +const results = await sensible.waitFor(request); +``` + +To classify an example document, take the following steps: + +1. Follow the steps in [Out-of-the-box extractions](doc:library-quickstart) to add support for bank statements to your account. + +2. Follow the steps in the preceding sections to install and initialize the SDK. + +3. Download the following example file and save it in the same directory as your `index.mjs` file: + +| Example document | [Download link](https://github.com/sensible-hq/sensible-configuration-library/raw/main/bank_statements/bank_of_america/boa_sample.pdf) | +| ---------------- | ------------------------------------------------------------ | + +4. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key]((https://app.sensible.so/account/) and `YOUR_DOCUMENT.pdf` with `boa_sample.pdf`. See the following code example to check your code completeness. + +5. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: + +```shell +node index.mjs +``` + +#### Check results + +The following excerpt of the results shows the extracted document text in the `TO_DO` object: + +```json +{ + document_type: { + id: '22666f4f-b8d6-4cb5-ad52-d00996989729', + name: 'bank_statements', + score: 0.8922476745112722 + }, + reference_documents: [ + { + id: 'c82ac28e-7725-4e42-b77c-e74551684caa', + name: 'boa_sample', + score: 0.9999980536061833 + }, + { + id: 'f80424a0-58f8-40e7-814a-eb49b199221e', + name: 'wells_fargo_checking_sample', + score: 0.8946129923339182 + }, + { + id: 'cf17daf8-7e8b-4b44-bc4b-7cdd6518d963', + name: 'chase_consolidated_balance_summary_sample', + score: 0.8677569417649393 + } + ] +} +``` + +#### Complete code example + +Here's a complete example of how to use the SDK for document classification in your own app: + +```node +import { SensibleSDK } from "sensible-api" + +const sensible = new SensibleSDK(YOUR_API_KEY); +const request = await sensible.classify({path:"./boa_sample.pdf"}); +const results = await sensible.waitFor(request); +console.log(results); +``` + +## Next + +For configuration options, see [Node SDK reference](https://docs.sensible.so/docs/sdk-node). + + From 81976d394d416a07f61a509e10c2fca4ae60c86b Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:17:39 -0700 Subject: [PATCH 10/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1b32f09..2434a3b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Welcome! Sensible is a developer-first platform for extracting structured data f ## Overview -Use Sensible Node SDK to: +The Sensible Node SDK is a simplification of the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: - [Extract](doc:quickstart-node#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. - [Classify](doc:quickstart-node#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. From a943c467c4f5c2c13ed356c9e0fd566276f1212d Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:22:49 -0700 Subject: [PATCH 11/56] Update README.md --- README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 2434a3b..bc03c3e 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Welcome! Sensible is a developer-first platform for extracting structured data f The Sensible Node SDK is a simplification of the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: -- [Extract](doc:quickstart-node#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. -- [Classify](doc:quickstart-node#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. +- [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. +- [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. ## Install @@ -145,7 +145,7 @@ const results = await sensible.waitFor(request); To classify an example document, take the following steps: -1. Follow the steps in [Out-of-the-box extractions](doc:library-quickstart) to add support for bank statements to your account. +1. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. 2. Follow the steps in the preceding sections to install and initialize the SDK. @@ -168,26 +168,26 @@ The following excerpt of the results shows the extracted document text in the `T ```json { - document_type: { - id: '22666f4f-b8d6-4cb5-ad52-d00996989729', - name: 'bank_statements', - score: 0.8922476745112722 + "document_type": { + "id": "22666f4f-b8d6-4cb5-ad52-d00996989729", + "name": "bank_statements", + "score": 0.8922476745112722 }, - reference_documents: [ + "reference_documents": [ { - id: 'c82ac28e-7725-4e42-b77c-e74551684caa', - name: 'boa_sample', - score: 0.9999980536061833 + "id": "c82ac28e-7725-4e42-b77c-e74551684caa", + "name": "boa_sample", + "score": 0.9999980536061833 }, { - id: 'f80424a0-58f8-40e7-814a-eb49b199221e', - name: 'wells_fargo_checking_sample', - score: 0.8946129923339182 + "id": "f80424a0-58f8-40e7-814a-eb49b199221e", + "name": "wells_fargo_checking_sample", + "score": 0.8946129923339182 }, { - id: 'cf17daf8-7e8b-4b44-bc4b-7cdd6518d963', - name: 'chase_consolidated_balance_summary_sample', - score: 0.8677569417649393 + "id": "cf17daf8-7e8b-4b44-bc4b-7cdd6518d963", + "name": "chase_consolidated_balance_summary_sample", + "score": 0.8677569417649393 } ] } From a3128f7494e89ef629abe59c0d58b0a2fe3a64dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Thu, 16 Nov 2023 13:32:46 -0300 Subject: [PATCH 12/56] publish readme changes --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 4e4c5d1..9db252f 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { "name": "sensible-api", - "version": "0.0.3", + "version": "0.0.4", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", - "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation"], + "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", "homepage": "https://github.com/sensible-hq/sensible-api-js", "main": "dist/index.js", From b4281bb448242aaab55baf3748b78a6af5b1036a Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:38:58 -0700 Subject: [PATCH 13/56] Update README.md --- README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index bc03c3e..7b973ab 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@ -# sensible-api -Open-source Sensible SDK for Node/Typescript +## Sensible Node SDK -Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. - -![Extracting document data](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) - - -## Overview - -The Sensible Node SDK is a simplification of the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: +The open-source Sensible Node SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: - [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. - [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. From 88a0900ea611b3f1f5cff91b663e4ec4ff035ceb Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:39:55 -0700 Subject: [PATCH 14/56] Update README.md --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7b973ab..9d15ef5 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ -## Sensible Node SDK +# Sensible Node SDK The open-source Sensible Node SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: - [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. - [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. +## Documentation + +For configuration options, see [Node SDK reference](https://docs.sensible.so/docs/sdk-node). + ## Install In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: @@ -198,9 +202,7 @@ const results = await sensible.waitFor(request); console.log(results); ``` -## Next -For configuration options, see [Node SDK reference](https://docs.sensible.so/docs/sdk-node). From 64270da571e76ff997a604b995e284ccc28e35fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Thu, 16 Nov 2023 13:53:53 -0300 Subject: [PATCH 15/56] readme changes --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 9db252f..d3dd01f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.4", + "version": "0.0.5", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", From 41fe255b66f02ce4e3eb3d0d980641bebaeff1ee Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:23:09 -0700 Subject: [PATCH 16/56] Update README.md --- README.md | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9d15ef5..09d372c 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,12 @@ const sensible = new SensibleSDK(YOUR_API_KEY); To extract data from a sample document at a URL: -1. Paste the following code into your `index.mjs` file: +1. Paste the following code into an empty `index.mjs` file: ```node +import { SensibleSDK } from "sensible-api" + +const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.extract({ url: "https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf", documentType: "sensible_instruct_basics", @@ -70,10 +73,13 @@ To extract from a local file: | Example document | [Download link](https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf) | | ---------------- | ------------------------------------------------------------ | -2. Paste the following code into your `index.mjs` file, then run it according to the steps in the previous option: +2. Paste the following code into an empty `index.mjs` file, then run it according to the steps in the previous option: ```node +import { SensibleSDK } from "sensible-api" + +const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.extract({ path: ("./contract.pdf"), documentType: "sensible_instruct_basics", @@ -111,21 +117,34 @@ Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics& ![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/sdk_node_1.png) -#### Complete code example +#### Code example: Extract from PDFs in directory and convert to Excel -See the following code for a complete example of how to use the SDK for document extraction in your own app. +See the following code for a complete example of how to use the SDK for document extraction in your own app: ```node -import { SensibleSDK } from "sensible-api" - -const sensible = new SensibleSDK(YOUR_API_KEY); -const request = await sensible.extract({ - path: ("./contract.pdf"), - documentType: "sensible_instruct_basics", - environment: "development" // see Node SDK reference for configuration options +import { promises as fs } from "fs"; +import { SensibleSDK } from "sensible-sdk"; +import got from "got"; +const apiKey = process.env.SENSIBLE_APIKEY; +const sensible = new SensibleSDK(apiKey); +const dir = process.argv[2]; +const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); +const extractions = await Promise.all( + files.map(async (filename) => { + const file = await fs.readFile(`${dir}/${filename}`); + return sensible.extract({ + file, + documentType: "bank_statements", }); -const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook -console.log(results); // see Node SDK reference to convert results from JSON to Excel + }) +); +await Promise.all( + extractions.map((extraction) => sensible.waitFor(extraction)) +); +const excel = await sensible.generateExcel(extractions); +console.log(excel); +const excelFile = await got(excel.url); +await fs.writeFile(`${dir}/output.xlsx`, excelFile.rawBody); ``` ## Classify @@ -160,7 +179,7 @@ node index.mjs #### Check results -The following excerpt of the results shows the extracted document text in the `TO_DO` object: +The following excerpt of the results shows the extracted document text: ```json { @@ -206,4 +225,3 @@ console.log(results); - From 5bb02c22e5a2311f7dd9a83b34754d313d07b966 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:25:54 -0700 Subject: [PATCH 17/56] Update README.md --- README.md | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 09d372c..4fcaad3 100644 --- a/README.md +++ b/README.md @@ -154,23 +154,24 @@ You can classify a document by its similarity to each document type you define i See the following code example for classifying a document. ```node -const request = await sensible.classify({path: "./boa_sample.pdf"}); +import { SensibleSDK } from "sensible-api" + +const sensible = new SensibleSDK(YOUR_API_KEY); +const request = await sensible.classify({path:"./boa_sample.pdf"}); const results = await sensible.waitFor(request); +console.log(results); ``` To classify an example document, take the following steps: -1. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. - -2. Follow the steps in the preceding sections to install and initialize the SDK. - -3. Download the following example file and save it in the same directory as your `index.mjs` file: +1. Follow the steps in the preceding sections to install the SDK. +2. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key]((https://app.sensible.so/account/). +3. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. +4. Download the following example file and save it in the same directory as your `index.mjs` file: | Example document | [Download link](https://github.com/sensible-hq/sensible-configuration-library/raw/main/bank_statements/bank_of_america/boa_sample.pdf) | | ---------------- | ------------------------------------------------------------ | -4. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key]((https://app.sensible.so/account/) and `YOUR_DOCUMENT.pdf` with `boa_sample.pdf`. See the following code example to check your code completeness. - 5. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: ```shell @@ -179,7 +180,7 @@ node index.mjs #### Check results -The following excerpt of the results shows the extracted document text: +The following excerpt of the results shows the document classification: ```json { @@ -208,20 +209,5 @@ The following excerpt of the results shows the extracted document text: } ``` -#### Complete code example - -Here's a complete example of how to use the SDK for document classification in your own app: - -```node -import { SensibleSDK } from "sensible-api" - -const sensible = new SensibleSDK(YOUR_API_KEY); -const request = await sensible.classify({path:"./boa_sample.pdf"}); -const results = await sensible.waitFor(request); -console.log(results); -``` - - - From aaf0d60250c2b11c1ed36289b3c0aadcd2290b7b Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:27:35 -0700 Subject: [PATCH 18/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4fcaad3..535d0ba 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics& #### Code example: Extract from PDFs in directory and convert to Excel -See the following code for a complete example of how to use the SDK for document extraction in your own app: +See the following code for a complete example of how to use the SDK for document extraction in your own app. ```node import { promises as fs } from "fs"; From d40a191f6f542e53dc9a22d26517e6fb3f296ccd Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:30:05 -0700 Subject: [PATCH 19/56] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 535d0ba..63b9fef 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,11 @@ Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics& #### Code example: Extract from PDFs in directory and convert to Excel -See the following code for a complete example of how to use the SDK for document extraction in your own app. +See the following code for a complete example of how to use the SDK for document extraction in your own app. The example: + +1. Filters a directory to find the PDF files. +2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. +3. Compiles the extractions into an Excel file. ```node import { promises as fs } from "fs"; From d1597964dd2355a5d1725544aaaa1aa2f47cff83 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:31:04 -0700 Subject: [PATCH 20/56] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 63b9fef..f780070 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,9 @@ Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics& #### Code example: Extract from PDFs in directory and convert to Excel -See the following code for a complete example of how to use the SDK for document extraction in your own app. The example: +See the following code for a complete example of how to use the SDK for document extraction in your own app. + +The example: 1. Filters a directory to find the PDF files. 2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. From 8a343fbec6f936e102145fa72cf6b5d8f4a9a0b2 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:34:37 -0700 Subject: [PATCH 21/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f780070..a19de7b 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ console.log(results); To classify an example document, take the following steps: 1. Follow the steps in the preceding sections to install the SDK. -2. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key]((https://app.sensible.so/account/). +2. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). 3. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. 4. Download the following example file and save it in the same directory as your `index.mjs` file: From 02b5ed513e8d89de0e55eeaa5271bdee7f69795b Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:35:56 -0700 Subject: [PATCH 22/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a19de7b..c673392 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,7 @@ node index.mjs #### Check results -The following excerpt of the results shows the document classification: +The following excerpt of the results shows that Sensible classifies the example document as a bank statement, and most probably as a Bank of America statement: ```json { From a3dc694a01cc53167b3e77ec14ebfe54b1284500 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 22 Nov 2023 15:47:41 -0700 Subject: [PATCH 23/56] Update README.md --- README.md | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c673392..7ef73d1 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ For configuration options, see [Node SDK reference](https://docs.sensible.so/doc ## Install -In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: +In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: ```shell npm install sensible-api @@ -62,13 +62,13 @@ console.log(results); // see Node SDK reference to convert results from JSON to node index.mjs ``` -The code extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. +The code extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. #### Option 2: local file -To extract from a local file: +To extract from a local file: -1. Download the following example file and save it in the same directory as your `index.mjs` file: +1. Download the following example file and save it in the same directory as your `index.mjs` file: | Example document | [Download link](https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf) | | ---------------- | ------------------------------------------------------------ | @@ -88,7 +88,7 @@ const results = await sensible.waitFor(request); // waitFor is optional if you c console.log(results); // see Node SDK reference to convert results from JSON to Excel ``` -This code uploads your local file to a Sensible-hosted URL and extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. +This code extracts data from a local file (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. #### Check results @@ -111,7 +111,7 @@ The following excerpt of the results shows the extracted document text in the `p For more information about the response body schema, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. -#### Optional: understand extraction +#### Optional: Understand extraction Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics&c=contract&g=contract to see how the extraction you just ran works in the Sensible app. You can add more fields to the extraction configuration to extract more data: @@ -119,13 +119,13 @@ Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics& #### Code example: Extract from PDFs in directory and convert to Excel -See the following code for a complete example of how to use the SDK for document extraction in your own app. +See the following code for a complete example of how to use the SDK for document extraction in your own app. The example: 1. Filters a directory to find the PDF files. 2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. -3. Compiles the extractions into an Excel file. +3. Writes the extractions into an Excel file. ```node import { promises as fs } from "fs"; @@ -163,7 +163,7 @@ See the following code example for classifying a document. import { SensibleSDK } from "sensible-api" const sensible = new SensibleSDK(YOUR_API_KEY); -const request = await sensible.classify({path:"./boa_sample.pdf"}); +const request = await sensible.classify({path:"./boa_sample.pdf"}); const results = await sensible.waitFor(request); console.log(results); ``` @@ -173,7 +173,7 @@ To classify an example document, take the following steps: 1. Follow the steps in the preceding sections to install the SDK. 2. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). 3. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. -4. Download the following example file and save it in the same directory as your `index.mjs` file: +4. Download the following example file and save it in the same directory as your `index.mjs` file: | Example document | [Download link](https://github.com/sensible-hq/sensible-configuration-library/raw/main/bank_statements/bank_of_america/boa_sample.pdf) | | ---------------- | ------------------------------------------------------------ | @@ -214,6 +214,3 @@ The following excerpt of the results shows that Sensible classifies the example ] } ``` - - - From 7d882e3f0d7cff3e65451121ab3b3108ce54b5c8 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:27:16 -0700 Subject: [PATCH 24/56] Update README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 7ef73d1..3be1d3e 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,16 @@ The open-source Sensible Node SDK offers convenient access to the [Sensible API] For configuration options, see [Node SDK reference](https://docs.sensible.so/docs/sdk-node). +## Versions + +The latest version of this SDK is v0. +The latest version of the Sensible API is v0. + +## Node and Typescript support + +This SDK supports all non end-of-lifed Node versions. +This SDK supports all non end-of-lifed Typescript versions. + ## Install In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: From eb3e1e1ec2e7f431d2fe94d0f48e492bbbf1afe6 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:44:15 -0700 Subject: [PATCH 25/56] Update README.md --- README.md | 217 +++++++++++++++++++++++++----------------------------- 1 file changed, 99 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index 3be1d3e..0c7e3d3 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,4 @@ -# Sensible Node SDK -The open-source Sensible Node SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use the Sensible Node SDK to: - -- [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then you run extractions for documents of the type with this SDK. -- [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. - -## Documentation - -For configuration options, see [Node SDK reference](https://docs.sensible.so/docs/sdk-node). - -## Versions - -The latest version of this SDK is v0. -The latest version of the Sensible API is v0. - -## Node and Typescript support - -This SDK supports all non end-of-lifed Node versions. -This SDK supports all non end-of-lifed Typescript versions. - -## Install - -In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: ```shell npm install sensible-api @@ -45,28 +22,28 @@ const sensible = new SensibleSDK(YOUR_API_KEY); **Note** In production ensure you secure your API key, for example as a GitHub secret. -## Extract document data - -#### Option 1: document URL +## Quickstart To extract data from a sample document at a URL: -1. Paste the following code into an empty `index.mjs` file: +1. Install the Sensible SDK using the steps in the previous section. +2. Paste the following code into an empty `index.mjs` file: ```node import { SensibleSDK } from "sensible-api" -const sensible = new SensibleSDK(YOUR_API_KEY); +const sensible = new SensibleSDK(YOUR_API_KEY); //replace with your API key const request = await sensible.extract({ url: "https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf", documentType: "sensible_instruct_basics", - environment: "development" // see Node SDK reference for full list of configuration options + environment: "development" }); -const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook -console.log(results); // see Node SDK reference to convert results from JSON to Excel +const results = await sensible.waitFor(request); // polls every 5 seconds. Optional if you configure a webhook +console.log(results); ``` -2. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: +2. Replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): +3. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: ```shell node index.mjs @@ -74,35 +51,9 @@ node index.mjs The code extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. -#### Option 2: local file - -To extract from a local file: - -1. Download the following example file and save it in the same directory as your `index.mjs` file: - -| Example document | [Download link](https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf) | -| ---------------- | ------------------------------------------------------------ | - -2. Paste the following code into an empty `index.mjs` file, then run it according to the steps in the previous option: - +#### Results -```node -import { SensibleSDK } from "sensible-api" - -const sensible = new SensibleSDK(YOUR_API_KEY); -const request = await sensible.extract({ - path: ("./contract.pdf"), - documentType: "sensible_instruct_basics", - }); -const results = await sensible.waitFor(request); // waitFor is optional if you configure a webhook -console.log(results); // see Node SDK reference to convert results from JSON to Excel -``` - -This code extracts data from a local file (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. - -#### Check results - -The following excerpt of the results shows the extracted document text in the `parsed_document` object: +You should see the following extracted document text in the `parsed_document` object in the logged response: ```json { @@ -119,15 +70,64 @@ The following excerpt of the results shows the extracted document text in the `p } ``` -For more information about the response body schema, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. - #### Optional: Understand extraction -Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics&c=contract&g=contract to see how the extraction you just ran works in the Sensible app. You can add more fields to the extraction configuration to extract more data: +Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics&c=contract&g=contract to see how the extraction you just ran works in the Sensible app. You can add more fields to the left pane to extract more data: ![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/sdk_node_1.png) -#### Code example: Extract from PDFs in directory and convert to Excel +## Usage: Extract document data + +You can use this SDK to extract data from a document, as specified by the extraction configurations and document types defined in your Sensible account. + +### Overview + +See the following steps for an overview of the SDK's workflow for document data extraction. Every method returns a chainable promise: + +1. Instantiate an SDK object with `new SensibleSDK()`. +2. Request a document extraction with `sensible.extract()`. Use the following required parameters: + 1. **(required)** Specify the document from which to extract data using the `url`, `path`, or `file` parameter. + 2. **(required)** Specify the user-defined document type or types using the `documentType` or `documentTypes` parameter. +3. Wait for the result. Use `sensible.waitFor()`, or use a webhook. +4. Optionally convert extractions to Excel file with `generateExcel()`. +5. Consume the data. + +### Extraction configuration + + You can configure options for document data extraction: + + +```node +const request = await sensible.extract({ + path: ("./1040_john_doe.pdf"), + documentType: "tax_forms", + webhook: { + url:"YOUR_WEBHOOK_URL", + payload: "additional info, for example, a UUID for verification", + }}); +``` + +See the following table for information about configuration options: + +| key | value | description | +| ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | +| path | string | An option for submitting the document you want to extract data from.
Pass the path to the document. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | An option for submitting the document you want to extract data from.
Pass the non-encoded document bytes. | +| url | string | An option for submitting the document you want to extract data from.
URL that responds to a GET request with the bytes of the document you want to extract data from. This URL must be either publicly accessible, or presigned with a security token as part of the URL path. To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | +| documentType | string | An option for specifying the document type or types.
Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`). | +| documentTypes | array | An option for specifying the document type or types.
Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | +| configurationName | string | If specified, Sensible uses the specified config to extract data from the document instead of automatically choosing the best-scoring extraction in the document type.
If unspecified, Sensible automatically detects the best-fit extraction from among the extraction queries ("configs") in the document type.
Not applicable for portfolios. | +| documentName | string | If you specify the filename of the document using this parameter, then Sensible returns the filename in the extraction response and populates the file name in the Sensible app's list of recent extractions. | +| environment | `"production"` or `"development"`. default: `"production"` | If you specify `development`, Sensible extracts preferentially using config versions published to the development environment in the Sensible app. The extraction runs all configs in the doc type before picking the best fit. For each config, falls back to production version if no development version of the config exists. | +| webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible will POST to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | + +### Extraction results + +Get extraction results by using a webhook or calling the Wait For method. + +For the schema for the results of an extraction request, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. + +### Example: Extract from PDFs in directory and output an Excel file See the following code for a complete example of how to use the SDK for document extraction in your own app. @@ -135,11 +135,11 @@ The example: 1. Filters a directory to find the PDF files. 2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. -3. Writes the extractions into an Excel file. +3. Writes the extractions to an Excel file. The Generate Excel method takes an extraction or an array of extractions, and outputs an Excel file. For more information about the conversion process, see [SenseML to spreadsheet reference](https://docs.sensible.so/docs/excel-reference). ```node import { promises as fs } from "fs"; -import { SensibleSDK } from "sensible-sdk"; +import { SensibleSDK } from "sensible-api"; import got from "got"; const apiKey = process.env.SENSIBLE_APIKEY; const sensible = new SensibleSDK(apiKey); @@ -147,9 +147,9 @@ const dir = process.argv[2]; const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); const extractions = await Promise.all( files.map(async (filename) => { - const file = await fs.readFile(`${dir}/${filename}`); + const path = `${dir}/${filename}`; return sensible.extract({ - file, + path, documentType: "bank_statements", }); }) @@ -157,70 +157,51 @@ const extractions = await Promise.all( await Promise.all( extractions.map((extraction) => sensible.waitFor(extraction)) ); -const excel = await sensible.generateExcel(extractions); -console.log(excel); -const excelFile = await got(excel.url); +const excel_download = await sensible.generateExcel(extractions); +console.log(excel_download); +const excelFile = await got(excel_download.url); await fs.writeFile(`${dir}/output.xlsx`, excelFile.rawBody); ``` -## Classify +## Usage: Classify documents by type + +You can use this SDK to classify a document by type, as specified by the document types defined in your Sensible account. For more information, see [Classifying documents by type](https://docs.sensible.so/docs/classify). + +### Overview + +See the following steps for an overview of the SDK's workflow for document classification. Every method returns a chainable promise: + +1. Instantiate an SDK object (`new SensibleSDK()`. -You can classify a document by its similarity to each document type you define in your Sensible account. For example, if you define a [bank statements](https://github.com/sensible-hq/sensible-configuration-library/tree/main/bank_statements) type and a [tax_forms](https://github.com/sensible-hq/sensible-configuration-library/tree/main/tax_forms) type in your account, you can classify 1040 forms, 1099 forms, Bank of America statements, Chase statements, and other documents, into those two types. +2. Request a document classification (`sensible.classify()`. Specify the document to classify using the `path` or `file` parameter. -See the following code example for classifying a document. +3. Poll for the result (`sensible.waitFor()`. + +4. Consume the data. + + +### Classification configuration + +You can configure options for document data extraction: ```node import { SensibleSDK } from "sensible-api" const sensible = new SensibleSDK(YOUR_API_KEY); -const request = await sensible.classify({path:"./boa_sample.pdf"}); +const request = await sensible.classify({ + path:"./boa_sample.pdf" + }); const results = await sensible.waitFor(request); console.log(results); ``` -To classify an example document, take the following steps: - -1. Follow the steps in the preceding sections to install the SDK. -2. Paste the preceding code into your `index.mjs` file. Ensure you replaced`YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). -3. Follow the steps in [Out-of-the-box extractions](https://docs.sensible.so/reference/choosing-an-endpoint/library-quickstart) to add support for bank statements to your account. -4. Download the following example file and save it in the same directory as your `index.mjs` file: - -| Example document | [Download link](https://github.com/sensible-hq/sensible-configuration-library/raw/main/bank_statements/bank_of_america/boa_sample.pdf) | -| ---------------- | ------------------------------------------------------------ | - -5. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: - -```shell -node index.mjs -``` +See the following table for information about configuration options: -#### Check results +| key | value | description | +| ---- | ------ | ------------------------------------------------------------ | +| path | string | An option for submitting the document you want to extract data from. Pass the path to the document. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | Pass the non-encoded document bytes. For information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | -The following excerpt of the results shows that Sensible classifies the example document as a bank statement, and most probably as a Bank of America statement: +### Classification results -```json -{ - "document_type": { - "id": "22666f4f-b8d6-4cb5-ad52-d00996989729", - "name": "bank_statements", - "score": 0.8922476745112722 - }, - "reference_documents": [ - { - "id": "c82ac28e-7725-4e42-b77c-e74551684caa", - "name": "boa_sample", - "score": 0.9999980536061833 - }, - { - "id": "f80424a0-58f8-40e7-814a-eb49b199221e", - "name": "wells_fargo_checking_sample", - "score": 0.8946129923339182 - }, - { - "id": "cf17daf8-7e8b-4b44-bc4b-7cdd6518d963", - "name": "chase_consolidated_balance_summary_sample", - "score": 0.8677569417649393 - } - ] -} -``` +Get results from this method by calling the Wait For method. For the schema for the results of a classification request , see [Classify document by type (sync)](https://docs.sensible.so/reference/classify-document-sync) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. From 255b4fbead6df463380b1d3e08052f16510aa383 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:44:33 -0700 Subject: [PATCH 26/56] Update README.md From 924d2638f7861b068347efc050a7993b8ab0bcc3 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:45:02 -0700 Subject: [PATCH 27/56] Update README.md --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 0c7e3d3..28e4a41 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,33 @@ +# Sensible Node SDK + +Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your SaaS products. Sensible is highly configurable: you can get simple data [in minutes](https://docs.sensible.so/docs/getting-started-ai) by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful [layout-based document primitives](https://docs.sensible.so/docs/getting-started). + +![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) + +This open-source Sensible SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use this SDK to: + +- [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then run extractions for documents of the type with this SDK. +- [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. + +## Documentation + +- For extraction and classification response schemas, see [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). +- For configuring document extractions, see [SenseML reference](https://docs.sensible.so/docs/senseml-reference-introduction). + +## Versions + +- The latest version of this SDK is v0. +- The latest version of the Sensible API is v0. + +## Node and Typescript support + +- This SDK supports all non-end-of-life Node versions. +- This SDK supports all non-end-of-life Typescript versions. + +## Install + +In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: ```shell npm install sensible-api @@ -205,3 +234,6 @@ See the following table for information about configuration options: ### Classification results Get results from this method by calling the Wait For method. For the schema for the results of a classification request , see [Classify document by type (sync)](https://docs.sensible.so/reference/classify-document-sync) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. + + + From 33d5d0d454c72c7144fa5678cc8bd47832022d48 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:54:14 -0700 Subject: [PATCH 28/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 28e4a41..d59903c 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ import { SensibleSDK } from "sensible-api"; Get an account at [sensible.so](https://app.sensible.so/register) if you don't have one already. -To initialize the dependency, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): +To initialize the dependency, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). ```node const sensible = new SensibleSDK(YOUR_API_KEY); From 5494a2fd0d53b2e27babdc6e610fab8b50bd4af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Wed, 29 Nov 2023 16:17:19 -0300 Subject: [PATCH 29/56] version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d3dd01f..0df64e0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.5", + "version": "0.0.6", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", From 1e8e9ec7e0575648368417e063a247e905e81b27 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 09:32:04 -0700 Subject: [PATCH 30/56] Update README.md --- README.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 28e4a41..6057150 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ Welcome! Sensible is a developer-first platform for extracting structured data f This open-source Sensible SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use this SDK to: -- [Extract](#extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then run extractions for documents of the type with this SDK. -- [Classify](#classify): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. +- [Extract](#usage-extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then run extractions for documents of the type with this SDK. +- [Classify](#usage-classify-documents-by-type): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. ## Documentation -- For extraction and classification response schemas, see [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). +- For extraction and classification response schemas, see the [Sensible API reference](https://docs.sensible.so/reference/choosing-an-endpoint). - For configuring document extractions, see [SenseML reference](https://docs.sensible.so/docs/senseml-reference-introduction). ## Versions @@ -33,7 +33,7 @@ In an environment in which you've installed Node, create a directory for a test npm install sensible-api ``` -To import Sensible and other dependencies to your project, create an `index.mjs` file in your test project, and add the following lines to the file: +To import Sensible to your project, create an `index.mjs` file in your test project, and add the following lines to the file: ```node import { SensibleSDK } from "sensible-api"; @@ -43,13 +43,13 @@ import { SensibleSDK } from "sensible-api"; Get an account at [sensible.so](https://app.sensible.so/register) if you don't have one already. -To initialize the dependency, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): +To initialize the SDK, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): ```node const sensible = new SensibleSDK(YOUR_API_KEY); ``` -**Note** In production ensure you secure your API key, for example as a GitHub secret. +**Note:** Ensure you secure your API key in production, for example as a GitHub secret. ## Quickstart @@ -59,7 +59,7 @@ To extract data from a sample document at a URL: 2. Paste the following code into an empty `index.mjs` file: ```node -import { SensibleSDK } from "sensible-api" +import { SensibleSDK } from "sensible-api"; const sensible = new SensibleSDK(YOUR_API_KEY); //replace with your API key const request = await sensible.extract({ @@ -71,7 +71,7 @@ const results = await sensible.waitFor(request); // polls every 5 seconds. Optio console.log(results); ``` -2. Replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): +2. Replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). 3. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: ```shell @@ -101,7 +101,7 @@ You should see the following extracted document text in the `parsed_document` ob #### Optional: Understand extraction -Navigate to https://app.sensible.so/editor/instruct/?d=sensible_instruct_basics&c=contract&g=contract to see how the extraction you just ran works in the Sensible app. You can add more fields to the left pane to extract more data: +Navigate to the example in the [SenseML editor](https://app.sensible.so/editor/?d=sensible_instruct_basics&c=contract&g=contract) to see how the extraction you just ran works in the Sensible app. You can add more fields to the left pane to extract more data: ![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/sdk_node_1.png) @@ -117,8 +117,8 @@ See the following steps for an overview of the SDK's workflow for document data 2. Request a document extraction with `sensible.extract()`. Use the following required parameters: 1. **(required)** Specify the document from which to extract data using the `url`, `path`, or `file` parameter. 2. **(required)** Specify the user-defined document type or types using the `documentType` or `documentTypes` parameter. -3. Wait for the result. Use `sensible.waitFor()`, or use a webhook. -4. Optionally convert extractions to Excel file with `generateExcel()`. +3. Wait for the results. Use `sensible.waitFor()`, or use a webhook. +4. Optionally convert extractions to an Excel file with `generateExcel()`. 5. Consume the data. ### Extraction configuration @@ -130,6 +130,8 @@ See the following steps for an overview of the SDK's workflow for document data const request = await sensible.extract({ path: ("./1040_john_doe.pdf"), documentType: "tax_forms", + configurationName: "1040_2021", + "environment": "development", webhook: { url:"YOUR_WEBHOOK_URL", payload: "additional info, for example, a UUID for verification", @@ -140,12 +142,12 @@ See the following table for information about configuration options: | key | value | description | | ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | -| path | string | An option for submitting the document you want to extract data from.
Pass the path to the document. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | An option for submitting the document you want to extract data from.
Pass the path to the document you want to extract from. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | | file | string | An option for submitting the document you want to extract data from.
Pass the non-encoded document bytes. | | url | string | An option for submitting the document you want to extract data from.
URL that responds to a GET request with the bytes of the document you want to extract data from. This URL must be either publicly accessible, or presigned with a security token as part of the URL path. To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | | documentType | string | An option for specifying the document type or types.
Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`). | | documentTypes | array | An option for specifying the document type or types.
Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | -| configurationName | string | If specified, Sensible uses the specified config to extract data from the document instead of automatically choosing the best-scoring extraction in the document type.
If unspecified, Sensible automatically detects the best-fit extraction from among the extraction queries ("configs") in the document type.
Not applicable for portfolios. | +| configurationName | string | If specified, Sensible uses the specified config to extract data from the document instead of automatically choosing the configuration.
If unspecified, Sensible automatically chooses the best-scoring extraction from the configs in the document type.
Not applicable for portfolios. | | documentName | string | If you specify the filename of the document using this parameter, then Sensible returns the filename in the extraction response and populates the file name in the Sensible app's list of recent extractions. | | environment | `"production"` or `"development"`. default: `"production"` | If you specify `development`, Sensible extracts preferentially using config versions published to the development environment in the Sensible app. The extraction runs all configs in the doc type before picking the best fit. For each config, falls back to production version if no development version of the config exists. | | webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible will POST to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | From bd36fa1982161a4ca72951b1c3c2be7cabd4a366 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 09:42:09 -0700 Subject: [PATCH 31/56] Update README.md --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7716441..d3fc7c7 100644 --- a/README.md +++ b/README.md @@ -142,13 +142,13 @@ See the following table for information about configuration options: | key | value | description | | ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | -| path | string | An option for submitting the document you want to extract data from.
Pass the path to the document you want to extract from. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | -| file | string | An option for submitting the document you want to extract data from.
Pass the non-encoded document bytes. | -| url | string | An option for submitting the document you want to extract data from.
URL that responds to a GET request with the bytes of the document you want to extract data from. This URL must be either publicly accessible, or presigned with a security token as part of the URL path. To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | -| documentType | string | An option for specifying the document type or types.
Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`). | -| documentTypes | array | An option for specifying the document type or types.
Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | +| path | string | The path to the document you want to extract from. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | The non-encoded bytes of the document you want to extract from. | +| url | string | The URL of the document you want to extract from. URL must:
- respond to a GET request with the bytes of the document you want to extract data from
- be either publicly accessible, or presigned with a security token as part of the URL path.
To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | +| documentType | string | Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`), or use Sensible's library of out-of-the-box supported document types. | +| documentTypes | array | Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | | configurationName | string | If specified, Sensible uses the specified config to extract data from the document instead of automatically choosing the configuration.
If unspecified, Sensible automatically chooses the best-scoring extraction from the configs in the document type.
Not applicable for portfolios. | -| documentName | string | If you specify the filename of the document using this parameter, then Sensible returns the filename in the extraction response and populates the file name in the Sensible app's list of recent extractions. | +| documentName | string | If you specify the file name of the document using this parameter, then Sensible returns the file name in the extraction response and populates the file name in the Sensible app's list of recent extractions. | | environment | `"production"` or `"development"`. default: `"production"` | If you specify `development`, Sensible extracts preferentially using config versions published to the development environment in the Sensible app. The extraction runs all configs in the doc type before picking the best fit. For each config, falls back to production version if no development version of the config exists. | | webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible will POST to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | @@ -160,7 +160,7 @@ For the schema for the results of an extraction request, see [Extract data from ### Example: Extract from PDFs in directory and output an Excel file -See the following code for a complete example of how to use the SDK for document extraction in your own app. +See the following code for an example of how to use the SDK for document extraction in your own app. The example: From 00348c9ccdeaf5d914deccd0abbb904d65bce1ed Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 09:49:58 -0700 Subject: [PATCH 32/56] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d3fc7c7..d206321 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ See the following steps for an overview of the SDK's workflow for document class You can configure options for document data extraction: ```node -import { SensibleSDK } from "sensible-api" +import { SensibleSDK } from "sensible-api"; const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.classify({ @@ -230,8 +230,8 @@ See the following table for information about configuration options: | key | value | description | | ---- | ------ | ------------------------------------------------------------ | -| path | string | An option for submitting the document you want to extract data from. Pass the path to the document. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | -| file | string | Pass the non-encoded document bytes. For information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | The path to the document you want to classify. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | The non-encoded bytes of the document you want to classify. | ### Classification results From 36320aa3e640366fc0bd9bcd2eebc62a91f22445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Thu, 30 Nov 2023 13:58:50 -0300 Subject: [PATCH 33/56] version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d3dd01f..4eaec13 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.5", + "version": "0.0.7", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", From ac179789b3ad8ed8bda52ea4f3af8a5e57346cda Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:21:24 -0700 Subject: [PATCH 34/56] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d206321..b604eaa 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ See the following table for information about configuration options: | key | value | description | | ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | -| path | string | The path to the document you want to extract from. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | The path to the document you want to extract from. For more information about supported file types and size, see [Supported file types](https://docs.sensible.so/docs/file-types). | | file | string | The non-encoded bytes of the document you want to extract from. | | url | string | The URL of the document you want to extract from. URL must:
- respond to a GET request with the bytes of the document you want to extract data from
- be either publicly accessible, or presigned with a security token as part of the URL path.
To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | | documentType | string | Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`), or use Sensible's library of out-of-the-box supported document types. | @@ -230,7 +230,7 @@ See the following table for information about configuration options: | key | value | description | | ---- | ------ | ------------------------------------------------------------ | -| path | string | The path to the document you want to classify. For more information about supported file types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | The path to the document you want to classify. For more information about supported file types and size, see [Supported file types](https://docs.sensible.so/docs/file-types). | | file | string | The non-encoded bytes of the document you want to classify. | ### Classification results From 0fa87f4a40861d517c4ef5e25ff7709e128ec205 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:23:23 -0700 Subject: [PATCH 35/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b604eaa..4587284 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ See the following table for information about configuration options: | key | value | description | | ---- | ------ | ------------------------------------------------------------ | -| path | string | The path to the document you want to classify. For more information about supported file types and size, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | The path to the document you want to classify. For information about supported file size and types, see [Supported file types](https://docs.sensible.so/docs/file-types). | | file | string | The non-encoded bytes of the document you want to classify. | ### Classification results From 4876b00c0bc5864869a066c6475e8a3467800d9c Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:24:29 -0700 Subject: [PATCH 36/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4587284..f818e9f 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ See the following table for information about configuration options: | key | value | description | | ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | -| path | string | The path to the document you want to extract from. For more information about supported file types and size, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| path | string | The path to the document you want to extract from. For more information about supported file size and types, see [Supported file types](https://docs.sensible.so/docs/file-types). | | file | string | The non-encoded bytes of the document you want to extract from. | | url | string | The URL of the document you want to extract from. URL must:
- respond to a GET request with the bytes of the document you want to extract data from
- be either publicly accessible, or presigned with a security token as part of the URL path.
To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | | documentType | string | Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`), or use Sensible's library of out-of-the-box supported document types. | From 228bb97da3c2730d196fe27764595e507b668034 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 4 Dec 2023 10:00:01 -0700 Subject: [PATCH 37/56] style updates --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f818e9f..08906de 100644 --- a/README.md +++ b/README.md @@ -147,20 +147,20 @@ See the following table for information about configuration options: | url | string | The URL of the document you want to extract from. URL must:
- respond to a GET request with the bytes of the document you want to extract data from
- be either publicly accessible, or presigned with a security token as part of the URL path.
To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | | documentType | string | Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`), or use Sensible's library of out-of-the-box supported document types. | | documentTypes | array | Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | -| configurationName | string | If specified, Sensible uses the specified config to extract data from the document instead of automatically choosing the configuration.
If unspecified, Sensible automatically chooses the best-scoring extraction from the configs in the document type.
Not applicable for portfolios. | +| configurationName | string | Sensible uses the specified config to extract data from the document instead of automatically choosing the configuration.
If unspecified, Sensible chooses the best-scoring extraction from the configs in the document type.
Not applicable for portfolios. | | documentName | string | If you specify the file name of the document using this parameter, then Sensible returns the file name in the extraction response and populates the file name in the Sensible app's list of recent extractions. | | environment | `"production"` or `"development"`. default: `"production"` | If you specify `development`, Sensible extracts preferentially using config versions published to the development environment in the Sensible app. The extraction runs all configs in the doc type before picking the best fit. For each config, falls back to production version if no development version of the config exists. | -| webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible will POST to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | +| webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible posts to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | ### Extraction results Get extraction results by using a webhook or calling the Wait For method. -For the schema for the results of an extraction request, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. +For the extraction results schema, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. ### Example: Extract from PDFs in directory and output an Excel file -See the following code for an example of how to use the SDK for document extraction in your own app. +See the following code for an example of how to use the SDK for document extraction in your app. The example: @@ -235,4 +235,4 @@ See the following table for information about configuration options: ### Classification results -Get results from this method by calling the Wait For method. For the schema for the results of a classification request , see [Classify document by type (sync)](https://docs.sensible.so/reference/classify-document-sync) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. +Get results from this method by calling the Wait For method. For the classification results schema, see [Classify document by type (sync)](https://docs.sensible.so/reference/classify-document-sync) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. From 69eac1bd8a6ea3c94a05146aa093c69946f90795 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 4 Dec 2023 12:15:52 -0700 Subject: [PATCH 38/56] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 08906de..05af889 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,7 @@ The example: 1. Filters a directory to find the PDF files. 2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. +4. Logs the extracted document data JSON to the console. 3. Writes the extractions to an Excel file. The Generate Excel method takes an extraction or an array of extractions, and outputs an Excel file. For more information about the conversion process, see [SenseML to spreadsheet reference](https://docs.sensible.so/docs/excel-reference). ```node @@ -174,7 +175,7 @@ import { SensibleSDK } from "sensible-api"; import got from "got"; const apiKey = process.env.SENSIBLE_APIKEY; const sensible = new SensibleSDK(apiKey); -const dir = process.argv[2]; +const dir = "PATH_TO_DOCUMENTS_DIR"; const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); const extractions = await Promise.all( files.map(async (filename) => { @@ -185,10 +186,14 @@ const extractions = await Promise.all( }); }) ); -await Promise.all( +const results = await Promise.all( extractions.map((extraction) => sensible.waitFor(extraction)) ); + +console.log(extractions); +console.log(results); const excel_download = await sensible.generateExcel(extractions); +console.log("Excel download URL:"); console.log(excel_download); const excelFile = await got(excel_download.url); await fs.writeFile(`${dir}/output.xlsx`, excelFile.rawBody); From 274b5a05dd0c317a608b645af4762b99db7efe5e Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:07:27 -0700 Subject: [PATCH 39/56] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 05af889..bf349ce 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ import { SensibleSDK } from "sensible-api"; import got from "got"; const apiKey = process.env.SENSIBLE_APIKEY; const sensible = new SensibleSDK(apiKey); -const dir = "PATH_TO_DOCUMENTS_DIR"; +const dir = "ABSOLUTE_PATH_TO_DOCUMENTS_DIR"; const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); const extractions = await Promise.all( files.map(async (filename) => { @@ -192,10 +192,10 @@ const results = await Promise.all( console.log(extractions); console.log(results); -const excel_download = await sensible.generateExcel(extractions); +const excel = await sensible.generateExcel(extractions); console.log("Excel download URL:"); -console.log(excel_download); -const excelFile = await got(excel_download.url); +console.log(excel); +const excelFile = await got(excel.url); await fs.writeFile(`${dir}/output.xlsx`, excelFile.rawBody); ``` @@ -223,7 +223,7 @@ You can configure options for document data extraction: ```node import { SensibleSDK } from "sensible-api"; -const sensible = new SensibleSDK(YOUR_API_KEY); +const sensible = new SensibleSDK("YOUR_API_KEY"); const request = await sensible.classify({ path:"./boa_sample.pdf" }); From 1697e206e09b98bf4b9fed4214934c5521b32e85 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 11 Dec 2023 10:40:29 -0700 Subject: [PATCH 40/56] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bf349ce..4ac706b 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,8 @@ const request = await sensible.extract({ path: ("./1040_john_doe.pdf"), documentType: "tax_forms", configurationName: "1040_2021", - "environment": "development", + environment: "development", + documentName="1040_john_doe.pdf", webhook: { url:"YOUR_WEBHOOK_URL", payload: "additional info, for example, a UUID for verification", From 771aa98dbbaf5d009a40ed1f07d22eaadcc62c48 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:00:00 -0700 Subject: [PATCH 41/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ac706b..2cc43d3 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ This open-source Sensible SDK offers convenient access to the [Sensible API](htt ## Install -In an environment in which you've installed Node, create a directory for a test project, open a command prompt in the directory, and install the dependencies: +In an environment with Node installed, create a directory for a test project, open a command prompt in the directory, and install the dependencies: ```shell npm install sensible-api From 7bed45c8f0bc7ce58bccd7ebab4fb6df3e2103a5 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:31:10 -0700 Subject: [PATCH 42/56] Update README.md --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2cc43d3..f6d8bee 100644 --- a/README.md +++ b/README.md @@ -27,16 +27,19 @@ This open-source Sensible SDK offers convenient access to the [Sensible API](htt ## Install -In an environment with Node installed, create a directory for a test project, open a command prompt in the directory, and install the dependencies: + +In an environment with Node installed, open a command prompt and enter the following commands to create a test project: ```shell -npm install sensible-api +mkdir sensible-test +cd sensible-test +touch index.mjs ``` -To import Sensible to your project, create an `index.mjs` file in your test project, and add the following lines to the file: +Then install the SDK: -```node -import { SensibleSDK } from "sensible-api"; +```shell +npm install sensible-api ``` ## Initialize From 2a8840cc5dc300f00ee06983fcccb4746bd68f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Thu, 14 Dec 2023 13:56:36 -0300 Subject: [PATCH 43/56] fix documentName case error --- package.json | 2 +- src/index.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 4eaec13..ca8ba51 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.7", + "version": "0.0.8", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", diff --git a/src/index.ts b/src/index.ts index af75562..026d45a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -28,7 +28,7 @@ export class SensibleSDK { : "") + `?${querystring.stringify({ ...(environment ? { environment } : {}), - ...(documentName ? { documentName } : {}), + ...(documentName ? { document_name: documentName } : {}), })}`; const body = { From af754bf2d727bb6a93097582fcbc47a729658077 Mon Sep 17 00:00:00 2001 From: Frances Elliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:16:11 -0700 Subject: [PATCH 44/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f6d8bee..036c571 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Sensible Node SDK -Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your SaaS products. Sensible is highly configurable: you can get simple data [in minutes](https://docs.sensible.so/docs/getting-started-ai) by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful [layout-based document primitives](https://docs.sensible.so/docs/getting-started). +Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. Use Sensible to build document-automation features into your SaaS products. Sensible is highly configurable: you can get simple data [in minutes](https://docs.sensible.so/docs/getting-started-ai) by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful [layout-based document primitives](https://docs.sensible.so/docs/getting-started). ![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) From 652bec40e156d6e59f29ccc4f366e4ed4e428e92 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:17:45 -0600 Subject: [PATCH 45/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 036c571..9955b8f 100644 --- a/README.md +++ b/README.md @@ -227,7 +227,7 @@ You can configure options for document data extraction: ```node import { SensibleSDK } from "sensible-api"; -const sensible = new SensibleSDK("YOUR_API_KEY"); +const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.classify({ path:"./boa_sample.pdf" }); From a5a77f083bc285ca558bff2b4621e211aa63fc57 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:26:13 -0600 Subject: [PATCH 46/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9955b8f..5ecad49 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ The example: import { promises as fs } from "fs"; import { SensibleSDK } from "sensible-api"; import got from "got"; -const apiKey = process.env.SENSIBLE_APIKEY; +const apiKey = process.env.SENSIBLE_API_KEY; const sensible = new SensibleSDK(apiKey); const dir = "ABSOLUTE_PATH_TO_DOCUMENTS_DIR"; const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); From 25a9a5e69e44061d1d4421d230ccb005acbb223e Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:29:48 -0600 Subject: [PATCH 47/56] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5ecad49..37d1f94 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ npm install sensible-api Get an account at [sensible.so](https://app.sensible.so/register) if you don't have one already. -To initialize the SDK, paste the following code into your `index.mjs` file and replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/): +To initialize the SDK, paste the following code into your `index.mjs` file and replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/): ```node const sensible = new SensibleSDK(YOUR_API_KEY); @@ -74,7 +74,7 @@ const results = await sensible.waitFor(request); // polls every 5 seconds. Optio console.log(results); ``` -2. Replace `YOUR_API_KEY` with your [API key](https://app.sensible.so/account/). +2. replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/). 3. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: ```shell From 2a961378f43442d3a12076169f282a45b7df7480 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:36:02 -0600 Subject: [PATCH 48/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 37d1f94..437b39f 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ const results = await sensible.waitFor(request); // polls every 5 seconds. Optio console.log(results); ``` -2. replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/). +2. Replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/). 3. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: ```shell From 9e2e1c46c14ba73cc924d385fe715a01d20a70d1 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:40:23 -0600 Subject: [PATCH 49/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 437b39f..026d2d6 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ import { SensibleSDK } from "sensible-api"; import got from "got"; const apiKey = process.env.SENSIBLE_API_KEY; const sensible = new SensibleSDK(apiKey); -const dir = "ABSOLUTE_PATH_TO_DOCUMENTS_DIR"; +const dir = ABSOLUTE_PATH_TO_DOCUMENTS_DIR; const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); const extractions = await Promise.all( files.map(async (filename) => { From 4d615b5332a78dd4d0bcfc6aa25f25b695da0034 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 22 Mar 2024 15:36:16 -0600 Subject: [PATCH 50/56] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 026d2d6..bf04965 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,8 @@ To extract data from a sample document at a URL: ```node import { SensibleSDK } from "sensible-api"; -const sensible = new SensibleSDK(YOUR_API_KEY); //replace with your API key +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production +const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.extract({ url: "https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf", documentType: "sensible_instruct_basics", From 9c6e8fff3f48bcb6f5a6091e60950737dcba2998 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Fri, 22 Mar 2024 19:24:08 -0300 Subject: [PATCH 51/56] build js for 2a8840cc5dc300f00ee06983fcccb4746bd68f09 --- dist/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/index.js b/dist/index.js index 4752d14..6f6a644 100644 --- a/dist/index.js +++ b/dist/index.js @@ -72,7 +72,7 @@ var SensibleSDK = /** @class */ (function () { ? "/".concat(params.documentType) + ("configuration" in params ? "/".concat(params.configurationName) : "") : "") + - "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { documentName: documentName } : {})))); + "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { document_name: documentName } : {})))); body = __assign(__assign(__assign({}, ("url" in params ? { document_url: params.url } : {})), (webhook ? { webhook: webhook } : {})), ("documentTypes" in params ? { types: params.documentTypes } : {})); headers = { authorization: "Bearer ".concat(this.apiKey) }; _b.label = 1; From 1652ba2e0733f4432cf551ac2e93eca76e311873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Fri, 22 Mar 2024 19:24:22 -0300 Subject: [PATCH 52/56] version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ca8ba51..566814a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.8", + "version": "0.0.9", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", From 05039a9147a74f32f612b720dfa896e99d4e11e2 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 25 Mar 2024 11:18:43 -0600 Subject: [PATCH 53/56] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bf04965..de22dd4 100644 --- a/README.md +++ b/README.md @@ -49,10 +49,11 @@ Get an account at [sensible.so](https://app.sensible.so/register) if you don't h To initialize the SDK, paste the following code into your `index.mjs` file and replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/): ```node +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production const sensible = new SensibleSDK(YOUR_API_KEY); ``` -**Note:** Ensure you secure your API key in production, for example as a GitHub secret. +**Note:** Secure your API key in production, for example as a GitHub secret. ## Quickstart From ea5c97399319bb200c797a4c8badbde354fcda2a Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Mon, 25 Mar 2024 11:35:32 -0600 Subject: [PATCH 54/56] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index de22dd4..edfa846 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,7 @@ You can configure options for document data extraction: ```node import { SensibleSDK } from "sensible-api"; +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production const sensible = new SensibleSDK(YOUR_API_KEY); const request = await sensible.classify({ path:"./boa_sample.pdf" From 4d98a3d3013fc0f2ff260db2f732dca040a862e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horacio=20Pe=C3=B1a?= Date: Mon, 15 Apr 2024 17:12:03 -0300 Subject: [PATCH 55/56] fix extraction status handling --- package.json | 2 +- src/index.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 566814a..af435a3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sensible-api", - "version": "0.0.9", + "version": "0.0.10", "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], "license": "MIT", diff --git a/src/index.ts b/src/index.ts index 026d45a..5cfba7d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -129,7 +129,7 @@ export class SensibleSDK { response && typeof response === "object" && "status" in response && - response.status !== "WAITING" + (response.status == "COMPLETE" || response.status == "FAILED") ) { return response as ExtractionResult; } From f1e2bc1de71595761947884705f2fe07e5e2a971 Mon Sep 17 00:00:00 2001 From: fscelliott <42477011+fscelliott@users.noreply.github.com> Date: Fri, 4 Oct 2024 13:58:13 -0600 Subject: [PATCH 56/56] template library reorg --- .gitignore | 12 +++++++++++- README.md | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 3c3629e..2a6399f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,11 @@ -node_modules +.envrc +logs.csv +test.sh +test.yml +docs.json +spec.json +*.sublime-project +*.sublime-workspace +readme-sync/f_project.sublime-workspace +*.sublime-workspace +readme-sync/f_project.sublime-workspace diff --git a/README.md b/README.md index edfa846..1f511cf 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Welcome! Sensible is a developer-first platform for extracting structured data f This open-source Sensible SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use this SDK to: - [Extract](#usage-extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then run extractions for documents of the type with this SDK. -- [Classify](#usage-classify-documents-by-type): Classify documents by the types you define, for example, bank statements or tax forms. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. +- [Classify](#usage-classify-documents-by-type): Classify documents by the types you define, for example, bank statements or tax documents. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. ## Documentation @@ -134,7 +134,7 @@ See the following steps for an overview of the SDK's workflow for document data ```node const request = await sensible.extract({ path: ("./1040_john_doe.pdf"), - documentType: "tax_forms", + documentType: "1040s", configurationName: "1040_2021", environment: "development", documentName="1040_john_doe.pdf",