diff --git a/.gitignore b/.gitignore index 3c3629e..2a6399f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,11 @@ -node_modules +.envrc +logs.csv +test.sh +test.yml +docs.json +spec.json +*.sublime-project +*.sublime-workspace +readme-sync/f_project.sublime-workspace +*.sublime-workspace +readme-sync/f_project.sublime-workspace diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2003910 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2023 Sensible Technologies, Inc + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index a611ba4..1f511cf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,250 @@ -# sensible-sdk-js -Sensible SDK for Javascript/Typescript -Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. use Sensible to build document-automation features into your vertical SaaS products. Sensible is highly configurable: you can get simple data in minutes by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful document primitives. For more information, see [Javascript SDK quickstart](https://docs.sensible.so/docs/quickstart-javascript). +# Sensible Node SDK + +Welcome! Sensible is a developer-first platform for extracting structured data from documents, for example, business forms in PDF format. Use Sensible to build document-automation features into your SaaS products. Sensible is highly configurable: you can get simple data [in minutes](https://docs.sensible.so/docs/getting-started-ai) by leveraging GPT-4 and other large-language models (LLMs), or you can tackle complex and idiosyncratic document formatting with Sensible's powerful [layout-based document primitives](https://docs.sensible.so/docs/getting-started). ![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/intro_SDK_2.png) + +This open-source Sensible SDK offers convenient access to the [Sensible API](https://docs.sensible.so/reference/choosing-an-endpoint). Use this SDK to: + +- [Extract](#usage-extract-document-data): Extract structured data from your custom documents. Configure the extractions for a set of similar documents, or *document type*, in the Sensible app or Sensible API, then run extractions for documents of the type with this SDK. +- [Classify](#usage-classify-documents-by-type): Classify documents by the types you define, for example, bank statements or tax documents. Use classification to determine which documents to extract prior to calling a Sensible extraction endpoint, or route each document in a system of record. + +## Documentation + +- For extraction and classification response schemas, see the [Sensible API reference](https://docs.sensible.so/reference/choosing-an-endpoint). +- For configuring document extractions, see [SenseML reference](https://docs.sensible.so/docs/senseml-reference-introduction). + +## Versions + +- The latest version of this SDK is v0. +- The latest version of the Sensible API is v0. + +## Node and Typescript support + +- This SDK supports all non-end-of-life Node versions. +- This SDK supports all non-end-of-life Typescript versions. + +## Install + + +In an environment with Node installed, open a command prompt and enter the following commands to create a test project: + +```shell +mkdir sensible-test +cd sensible-test +touch index.mjs +``` + +Then install the SDK: + +```shell +npm install sensible-api +``` + +## Initialize + +Get an account at [sensible.so](https://app.sensible.so/register) if you don't have one already. + +To initialize the SDK, paste the following code into your `index.mjs` file and replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/): + +```node +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production +const sensible = new SensibleSDK(YOUR_API_KEY); +``` + +**Note:** Secure your API key in production, for example as a GitHub secret. + +## Quickstart + +To extract data from a sample document at a URL: + +1. Install the Sensible SDK using the steps in the previous section. +2. Paste the following code into an empty `index.mjs` file: + +```node +import { SensibleSDK } from "sensible-api"; + +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production +const sensible = new SensibleSDK(YOUR_API_KEY); +const request = await sensible.extract({ + url: "https://github.com/sensible-hq/sensible-docs/raw/main/readme-sync/assets/v0/pdfs/contract.pdf", + documentType: "sensible_instruct_basics", + environment: "development" + }); +const results = await sensible.waitFor(request); // polls every 5 seconds. Optional if you configure a webhook +console.log(results); +``` + +2. Replace `*YOUR_API_KEY*` with your [API key](https://app.sensible.so/account/). +3. In a command prompt in the same directory as your `index.mjs` file, run the code with the following command: + +```shell +node index.mjs +``` + +The code extracts data from an example document (`contract.pdf`) using an example document type (`sensible_instruct_basics`) and an example extraction configuration. + +#### Results + +You should see the following extracted document text in the `parsed_document` object in the logged response: + +```json +{ + "purchase_price": { + "source": "$400,000", + "value": 400000, + "unit": "$", + "type": "currency" + }, + "street_address": { + "value": "1234 ABC COURT City of SALT LAKE CITY County of Salt Lake -\nState of Utah, Zip 84108", + "type": "address" + } +} +``` + +#### Optional: Understand extraction + +Navigate to the example in the [SenseML editor](https://app.sensible.so/editor/?d=sensible_instruct_basics&c=contract&g=contract) to see how the extraction you just ran works in the Sensible app. You can add more fields to the left pane to extract more data: + +![Click to enlarge](https://raw.githubusercontent.com/sensible-hq/sensible-docs/main/readme-sync/assets/v0/images/final/sdk_node_1.png) + +## Usage: Extract document data + +You can use this SDK to extract data from a document, as specified by the extraction configurations and document types defined in your Sensible account. + +### Overview + +See the following steps for an overview of the SDK's workflow for document data extraction. Every method returns a chainable promise: + +1. Instantiate an SDK object with `new SensibleSDK()`. +2. Request a document extraction with `sensible.extract()`. Use the following required parameters: + 1. **(required)** Specify the document from which to extract data using the `url`, `path`, or `file` parameter. + 2. **(required)** Specify the user-defined document type or types using the `documentType` or `documentTypes` parameter. +3. Wait for the results. Use `sensible.waitFor()`, or use a webhook. +4. Optionally convert extractions to an Excel file with `generateExcel()`. +5. Consume the data. + +### Extraction configuration + + You can configure options for document data extraction: + + +```node +const request = await sensible.extract({ + path: ("./1040_john_doe.pdf"), + documentType: "1040s", + configurationName: "1040_2021", + environment: "development", + documentName="1040_john_doe.pdf", + webhook: { + url:"YOUR_WEBHOOK_URL", + payload: "additional info, for example, a UUID for verification", + }}); +``` + +See the following table for information about configuration options: + +| key | value | description | +| ----------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | +| path | string | The path to the document you want to extract from. For more information about supported file size and types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | The non-encoded bytes of the document you want to extract from. | +| url | string | The URL of the document you want to extract from. URL must:
- respond to a GET request with the bytes of the document you want to extract data from
- be either publicly accessible, or presigned with a security token as part of the URL path.
To check if the URL meets these criteria, open the URL with a web browser. The browser must either render the document as a full-page view with no other data, or download the document, without prompting for authentication. | +| documentType | string | Type of document to extract from. Create your custom type in the Sensible app (for example, `rate_confirmation`, `certificate_of_insurance`, or `home_inspection_report`), or use Sensible's library of out-of-the-box supported document types. | +| documentTypes | array | Types of documents to extract from. Use this parameter to extract from multiple documents that are packaged into one file (a "portfolio"). This parameter specifies the document types contained in the portfolio. Sensible then segments the portfolio into documents using the specified document types (for example, 1099, w2, and bank_statement) and then runs extractions for each document. For more information, see [Multi-doc extraction](https://docs.sensible.so/docs/portfolio). | +| configurationName | string | Sensible uses the specified config to extract data from the document instead of automatically choosing the configuration.
If unspecified, Sensible chooses the best-scoring extraction from the configs in the document type.
Not applicable for portfolios. | +| documentName | string | If you specify the file name of the document using this parameter, then Sensible returns the file name in the extraction response and populates the file name in the Sensible app's list of recent extractions. | +| environment | `"production"` or `"development"`. default: `"production"` | If you specify `development`, Sensible extracts preferentially using config versions published to the development environment in the Sensible app. The extraction runs all configs in the doc type before picking the best fit. For each config, falls back to production version if no development version of the config exists. | +| webhook | object | Specifies to return extraction results to the specified webhook URL as soon as they're complete, so you don't have to poll for results status. Sensible also calls this webhook on error.
The webhook object has the following parameters:
`url`: string. Webhook destination. Sensible posts to this URL when the extraction is complete.
`payload`: string, number, boolean, object, or array. Information additional to the API response, for example a UUID for verification. | + +### Extraction results + +Get extraction results by using a webhook or calling the Wait For method. + +For the extraction results schema, see [Extract data from a document](https://docs.sensible.so/reference/extract-data-from-a-document) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. + +### Example: Extract from PDFs in directory and output an Excel file + +See the following code for an example of how to use the SDK for document extraction in your app. + +The example: + +1. Filters a directory to find the PDF files. +2. Extracts data from the PDF files using the extraction configurations in a `bank_statements` document type. +4. Logs the extracted document data JSON to the console. +3. Writes the extractions to an Excel file. The Generate Excel method takes an extraction or an array of extractions, and outputs an Excel file. For more information about the conversion process, see [SenseML to spreadsheet reference](https://docs.sensible.so/docs/excel-reference). + +```node +import { promises as fs } from "fs"; +import { SensibleSDK } from "sensible-api"; +import got from "got"; +const apiKey = process.env.SENSIBLE_API_KEY; +const sensible = new SensibleSDK(apiKey); +const dir = ABSOLUTE_PATH_TO_DOCUMENTS_DIR; +const files = (await fs.readdir(dir)).filter((file) => file.match(/\.pdf$/)); +const extractions = await Promise.all( + files.map(async (filename) => { + const path = `${dir}/${filename}`; + return sensible.extract({ + path, + documentType: "bank_statements", + }); + }) +); +const results = await Promise.all( + extractions.map((extraction) => sensible.waitFor(extraction)) +); + +console.log(extractions); +console.log(results); +const excel = await sensible.generateExcel(extractions); +console.log("Excel download URL:"); +console.log(excel); +const excelFile = await got(excel.url); +await fs.writeFile(`${dir}/output.xlsx`, excelFile.rawBody); +``` + +## Usage: Classify documents by type + +You can use this SDK to classify a document by type, as specified by the document types defined in your Sensible account. For more information, see [Classifying documents by type](https://docs.sensible.so/docs/classify). + +### Overview + +See the following steps for an overview of the SDK's workflow for document classification. Every method returns a chainable promise: + +1. Instantiate an SDK object (`new SensibleSDK()`. + +2. Request a document classification (`sensible.classify()`. Specify the document to classify using the `path` or `file` parameter. + +3. Poll for the result (`sensible.waitFor()`. + +4. Consume the data. + + +### Classification configuration + +You can configure options for document data extraction: + +```node +import { SensibleSDK } from "sensible-api"; + +// if you paste in your key, like `SensibleSDK("1ac34b14")` then secure it in production +const sensible = new SensibleSDK(YOUR_API_KEY); +const request = await sensible.classify({ + path:"./boa_sample.pdf" + }); +const results = await sensible.waitFor(request); +console.log(results); +``` + +See the following table for information about configuration options: + +| key | value | description | +| ---- | ------ | ------------------------------------------------------------ | +| path | string | The path to the document you want to classify. For information about supported file size and types, see [Supported file types](https://docs.sensible.so/docs/file-types). | +| file | string | The non-encoded bytes of the document you want to classify. | + +### Classification results + +Get results from this method by calling the Wait For method. For the classification results schema, see [Classify document by type (sync)](https://docs.sensible.so/reference/classify-document-sync) and expand the 200 responses in the middle pane and the right pane to see the model and an example, respectively. diff --git a/dist/index.d.ts b/dist/index.d.ts index dc0469e..0d13016 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -1,5 +1,5 @@ /// -import { ClassificationResult, ExtractionResult, Webhook } from "./types"; +import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; export declare class SensibleSDK { apiKey: string; constructor(apiKey: string); @@ -14,6 +14,8 @@ type FileDefinition = { file: Buffer; } | { url: string; +} | { + path: string; }; type DocumentType = { documentType: string; @@ -29,6 +31,8 @@ type Options = { type ExtractParams = FileDefinition & DocumentType & Options; type ClassificationParams = { file: Buffer; +} | { + path: string; }; type ExtractionRequest = { type: "extraction"; @@ -39,4 +43,4 @@ type ClassificationRequest = { id: string; downloadLink: string; }; -export {}; +export type { ClassificationResult, ExtractionResult, Webhook }; diff --git a/dist/index.js b/dist/index.js index 354d9cf..6f6a644 100644 --- a/dist/index.js +++ b/dist/index.js @@ -50,6 +50,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.SensibleSDK = void 0; var got_1 = require("got"); var querystring = require("node:querystring"); +var fs_1 = require("fs"); var util_1 = require("util"); var baseUrl = "https://api.sensible.so/v0"; var SensibleSDK = /** @class */ (function () { @@ -58,9 +59,9 @@ var SensibleSDK = /** @class */ (function () { } SensibleSDK.prototype.extract = function (params) { return __awaiter(this, void 0, void 0, function () { - var webhook, documentName, environment, url, body, headers, response, e_1, id, upload_url, putResponse, e_2; - return __generator(this, function (_a) { - switch (_a.label) { + var webhook, documentName, environment, url, body, headers, response, e_1, id, upload_url, file, _a, putResponse, e_2; + return __generator(this, function (_b) { + switch (_b.label) { case 0: // This can be called from JS, so we cannot trust the type engine validateExtractParams(params); @@ -71,12 +72,12 @@ var SensibleSDK = /** @class */ (function () { ? "/".concat(params.documentType) + ("configuration" in params ? "/".concat(params.configurationName) : "") : "") + - "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { documentName: documentName } : {})))); + "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { document_name: documentName } : {})))); body = __assign(__assign(__assign({}, ("url" in params ? { document_url: params.url } : {})), (webhook ? { webhook: webhook } : {})), ("documentTypes" in params ? { types: params.documentTypes } : {})); headers = { authorization: "Bearer ".concat(this.apiKey) }; - _a.label = 1; + _b.label = 1; case 1: - _a.trys.push([1, 3, , 4]); + _b.trys.push([1, 3, , 4]); return [4 /*yield*/, got_1.default .post(url, { json: body, @@ -84,10 +85,10 @@ var SensibleSDK = /** @class */ (function () { }) .json()]; case 2: - response = _a.sent(); + response = _b.sent(); return [3 /*break*/, 4]; case 3: - e_1 = _a.sent(); + e_1 = _b.sent(); throwError(e_1); return [3 /*break*/, 4]; case 4: @@ -101,52 +102,70 @@ var SensibleSDK = /** @class */ (function () { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } id = response.id, upload_url = response.upload_url; - _a.label = 6; - case 6: - _a.trys.push([6, 8, , 9]); + if (!("file" in params)) return [3 /*break*/, 6]; + _a = params.file; + return [3 /*break*/, 8]; + case 6: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; + case 7: + _a = _b.sent(); + _b.label = 8; + case 8: + file = _a; + _b.label = 9; + case 9: + _b.trys.push([9, 11, , 12]); return [4 /*yield*/, got_1.default.put(upload_url, { method: "PUT", - body: params.file, + body: file, })]; - case 7: - putResponse = _a.sent(); - return [3 /*break*/, 9]; - case 8: - e_2 = _a.sent(); + case 10: + putResponse = _b.sent(); + return [3 /*break*/, 12]; + case 11: + e_2 = _b.sent(); throw "Error ".concat(e_2.response.statusCode, " uploading file to S3: ").concat(e_2.response.body); - case 9: return [2 /*return*/, { type: "extraction", id: id }]; + case 12: return [2 /*return*/, { type: "extraction", id: id }]; } }); }); }; SensibleSDK.prototype.classify = function (params) { return __awaiter(this, void 0, void 0, function () { - var url, response, e_3; - return __generator(this, function (_a) { - switch (_a.label) { + var url, file, _a, response, e_3; + return __generator(this, function (_b) { + switch (_b.label) { case 0: validateClassificationParams(params); url = "".concat(baseUrl, "/classify/async"); - _a.label = 1; - case 1: - _a.trys.push([1, 3, , 4]); + if (!("file" in params)) return [3 /*break*/, 1]; + _a = params.file; + return [3 /*break*/, 3]; + case 1: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; + case 2: + _a = _b.sent(); + _b.label = 3; + case 3: + file = _a; + _b.label = 4; + case 4: + _b.trys.push([4, 6, , 7]); return [4 /*yield*/, got_1.default .post(url, { - body: params.file, + body: file, headers: { authorization: "Bearer ".concat(this.apiKey), "content-type": "application/pdf", // HACK }, }) .json()]; - case 2: - response = _a.sent(); - return [3 /*break*/, 4]; - case 3: - e_3 = _a.sent(); + case 5: + response = _b.sent(); + return [3 /*break*/, 7]; + case 6: + e_3 = _b.sent(); throwError(e_3); - return [3 /*break*/, 4]; - case 4: + return [3 /*break*/, 7]; + case 7: if (!isClassificationResponse(response)) { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } @@ -243,10 +262,12 @@ function validateExtractParams(params) { if (!params || typeof params != "object") throw "Invalid extraction parameters: not an object"; if (!(("file" in params && params.file instanceof Buffer) || - ("url" in params && typeof params.url === "string"))) - throw "Invalid extraction parameters: must include file or url"; - if ("file" in params && "url" in params) - throw "Invalid extraction parameters: ony one of file or url should be set"; + ("url" in params && typeof params.url === "string") || + ("path" in params && typeof params.path === "string"))) + throw "Invalid extraction parameters: must include file, url or path"; + if (["file" in params, "url" in params, "path" in params].filter(function (x) { return x; }) + .length !== 1) + throw "Invalid extraction parameters: only one of file, url or path should be set"; if (!(("documentType" in params && typeof params.documentType === "string") || ("documentTypes" in params && Array.isArray(params.documentTypes) && @@ -262,8 +283,8 @@ function validateExtractParams(params) { function validateClassificationParams(params) { if (!(params && typeof params === "object" && - "file" in params && - params.file instanceof Buffer)) + (("file" in params && params.file instanceof Buffer) || + ("path" in params && typeof params.path === "string")))) throw "Invalid classification params"; } var sleep = (0, util_1.promisify)(setTimeout); diff --git a/package-lock.json b/package-lock.json index 5b1f3f3..92ce03d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,13 @@ { - "name": "sensible-sdk", + "name": "sensible-api", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "sensible-sdk", + "name": "sensible-api", "version": "0.0.1", + "license": "MIT", "dependencies": { "got": "^11.8.5" }, diff --git a/package.json b/package.json index dad37c4..af435a3 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,10 @@ { - "name": "sensible-sdk", - "version": "0.0.1", - "description": "Sensible SDK", + "name": "sensible-api", + "version": "0.0.10", + "description": "Javascript SDK for Sensible, the developer-first platform for extracting structured data from documents so that you can build document-automation features into your SaaS products", + "keywords": ["IDP","parsing","conversion","openai","processing","csv","excel","convert","json","LLMs","pdf","png","tiff","jpeg","doc","docx","document","text","data","extraction","extract","classification","classify","sensible","openapi","gpt-3","gpt-4","senseml","automation","sdk","query","document-processing"," intelligent-document-processing"," pdf-conversion"," pdf-extraction"," document-extraction"," pdf-crawler","pdf-parser","pdf-extract","pdf-to-data","document-data-extraction","document-automation","sensible-api"], + "license": "MIT", + "homepage": "https://github.com/sensible-hq/sensible-api-js", "main": "dist/index.js", "types": "dist/index.d.ts", "files": [ @@ -10,7 +13,7 @@ "scripts": { "build": "tsc" }, - "author": "Sensible", + "author": "Sensible Technologies, Inc", "devDependencies": { "@types/node": "^20.6.3" }, diff --git a/src/index.ts b/src/index.ts index 34fff0c..5cfba7d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,8 @@ import got, { HTTPError } from "got"; import * as querystring from "node:querystring"; +import { promises as fs } from "fs"; import { promisify } from "util"; -import { ClassificationResult, ExtractionResult, Webhook } from "./types"; +import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; const baseUrl = "https://api.sensible.so/v0"; @@ -27,7 +28,7 @@ export class SensibleSDK { : "") + `?${querystring.stringify({ ...(environment ? { environment } : {}), - ...(documentName ? { documentName } : {}), + ...(documentName ? { document_name: documentName } : {}), })}`; const body = { @@ -65,10 +66,13 @@ export class SensibleSDK { const { id, upload_url } = response; + const file = + "file" in params ? params.file : await fs.readFile(params.path); + try { const putResponse = await got.put(upload_url, { method: "PUT", - body: params.file, + body: file, }); } catch (e: any) { throw `Error ${e.response.statusCode} uploading file to S3: ${e.response.body}`; @@ -83,11 +87,14 @@ export class SensibleSDK { const url = `${baseUrl}/classify/async`; + const file = + "file" in params ? params.file : await fs.readFile(params.path); + let response; try { response = await got .post(url, { - body: params.file, + body: file, headers: { authorization: `Bearer ${this.apiKey}`, "content-type": "application/pdf", // HACK @@ -122,7 +129,7 @@ export class SensibleSDK { response && typeof response === "object" && "status" in response && - response.status !== "WAITING" + (response.status == "COMPLETE" || response.status == "FAILED") ) { return response as ExtractionResult; } @@ -166,7 +173,7 @@ export class SensibleSDK { } } -type FileDefinition = { file: Buffer } | { url: string }; +type FileDefinition = { file: Buffer } | { url: string } | { path: string }; type DocumentType = | { documentType: string; configurationName?: string } | { documentTypes: string[] }; @@ -185,12 +192,16 @@ function validateExtractParams(params: unknown) { if ( !( ("file" in params && params.file instanceof Buffer) || - ("url" in params && typeof params.url === "string") + ("url" in params && typeof params.url === "string") || + ("path" in params && typeof params.path === "string") ) ) - throw "Invalid extraction parameters: must include file or url"; - if ("file" in params && "url" in params) - throw "Invalid extraction parameters: ony one of file or url should be set"; + throw "Invalid extraction parameters: must include file, url or path"; + if ( + ["file" in params, "url" in params, "path" in params].filter((x) => x) + .length !== 1 + ) + throw "Invalid extraction parameters: only one of file, url or path should be set"; if ( !( ("documentType" in params && typeof params.documentType === "string") || @@ -210,15 +221,15 @@ function validateExtractParams(params: unknown) { throw "Invalid extraction parameters: environment should be a string"; } -type ClassificationParams = { file: Buffer }; +type ClassificationParams = { file: Buffer } | { path: string }; function validateClassificationParams(params: unknown) { if ( !( params && typeof params === "object" && - "file" in params && - params.file instanceof Buffer + (("file" in params && params.file instanceof Buffer) || + ("path" in params && typeof params.path === "string")) ) ) throw "Invalid classification params"; @@ -300,3 +311,5 @@ function isClassificationResponse( typeof response.download_link === "string" ); } + +export type { ClassificationResult, ExtractionResult, Webhook };