AI-102 Implement knowledge mining and information extraction solutions • Complete Question Bank
Complete AI-102 Implement knowledge mining and information extraction solutions question bank — all 0 questions with answers and detailed explanations.
Refer to the exhibit.
{
"name": "my-skillset",
"description": "Custom skillset",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"context": "/document",
"textSplitMode": "pages",
"maximumPageLength": 5000,
"defaultLanguageCode": "en",
"inputs": [
{ "name": "text", "source": "/document/content" }
],
"outputs": [
{ "name": "textItems", "targetName": "pages" }
]
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.SentimentSkill",
"context": "/document/pages/*",
"defaultLanguageCode": "en",
"inputs": [
{ "name": "text", "source": "/document/pages/*" }
],
"outputs": [
{ "name": "sentiment", "targetName": "sentiment" }
]
}
]
}Refer to the exhibit.
$index = @{
name = "knowledge-index"
fields = @(
@{name = "id"; type = "Edm.String"; key = $true},
@{name = "content"; type = "Edm.String"; searchable = $true},
@{name = "people"; type = "Collection(Edm.String)"; searchable = $true; filterable = $true},
@{name = "organizations"; type = "Collection(Edm.String)"; searchable = $true; filterable = $true}
)
suggesters = @(
@{name = "sg"; searchMode = "analyzingInfixMatching"; sourceFields = @("content")}
)
}Refer to the exhibit.
{
"dataSource": {
"name": "blob-datasource",
"type": "azureblob",
"credentials": {
"connectionString": "DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=...;EndpointSuffix=core.windows.net"
},
"container": {
"name": "documents"
}
},
"index": {
"name": "docs-index",
"fields": [
{"name":"id","type":"Edm.String","key":true,"searchable":false},
{"name":"content","type":"Edm.String","searchable":true},
{"name":"metadata_storage_name","type":"Edm.String","searchable":true}
]
},
"indexer": {
"name": "docs-indexer",
"dataSourceName": "blob-datasource",
"targetIndexName": "docs-index",
"parameters": {
"batchSize": 10,
"maxFailedItems": -1
}
}
}Refer to the exhibit.
{
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "#1",
"context": "/document",
"inputs": [
{"name": "text", "source": "/document/content"},
{"name": "textSplitMode", "source": "pages"},
{"name": "maximumPageLength", "source": 5000}
],
"outputs": [
{"name": "textItems", "targetName": "pages"}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.SentimentSkill",
"name": "#2",
"context": "/document/pages/*",
"inputs": [
{"name": "text", "source": "/document/pages/*"}
],
"outputs": [
{"name": "sentiment", "targetName": "sentimentLabel"},
{"name": "confidenceScores", "targetName": "confidenceScores"}
]
}
]
}Refer to the exhibit.
{
"dataSourceName": "myblob",
"skillsetName": "mypdfskillset",
"targetIndexName": "myindex",
"parameters": {
"configuration": {
"dataToExtract": "contentAndMetadata",
"parsingMode": "json"
}
},
"fieldMappings": [
{"sourceFieldName": "metadata_storage_path", "targetFieldName": "path"},
{"sourceFieldName": "content", "targetFieldName": "content"}
]
}Refer to the exhibit.
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "Microsoft.Search/searchServices/indexers",
"apiVersion": "2023-11-01",
"name": "demo-indexer",
"dependsOn": [
"[resourceId('Microsoft.Search/searchServices', 'demo-search')]"
],
"properties": {
"dataSourceName": "demo-datasource",
"targetIndexName": "demo-index",
"skillsetName": "demo-skillset",
"schedule": {
"interval": "PT1H",
"startTime": "2024-01-01T00:00:00Z"
},
"parameters": {
"maxFailedItems": 10,
"maxFailedItemsPerBatch": 5
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "path"
}
]
}
}
]
}Refer to the exhibit.
{
"skillset": {
"name": "demo-skillset",
"description": "Custom skillset for enrichment",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
"name": "custom-skill",
"description": "Calls external API for entity extraction",
"uri": "https://myfunctionapp.azurewebsites.net/api/extract",
"context": "/document",
"inputs": [
{
"name": "text",
"source": "/document/content"
}
],
"outputs": [
{
"name": "entities",
"targetName": "extractedEntities"
}
],
"httpMethod": "POST",
"timeout": "PT30S",
"batchSize": 5,
"degreeOfParallelism": 3
}
]
}
}Refer to the exhibit.
```json
{
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "split-skill",
"context": "/document",
"inputs": [
{
"name": "text",
"source": "/document/content"
}
],
"outputs": [
{
"name": "pages",
"targetName": "pages"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.SentimentSkill",
"name": "sentiment-skill",
"context": "/document/pages/*",
"inputs": [
{
"name": "text",
"source": "/document/pages/*"
}
],
"outputs": [
{
"name": "sentiment",
"targetName": "sentiment"
}
]
}
]
}
```Refer to the exhibit.
```json
{
"indexers": [
{
"name": "my-indexer",
"dataSourceName": "my-datasource",
"targetIndexName": "my-index",
"skillsetName": "my-skillset",
"schedule": {
"interval": "PT1H"
},
"parameters": {
"batchSize": 100,
"maxFailedItems": 10,
"maxFailedItemsPerBatch": 5,
"configuration": {
"dataToExtract": "contentAndMetadata",
"imageAction": "generateNormalizedImages"
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "path"
}
],
"outputFieldMappings": [
{
"sourceFieldName": "/document/content",
"targetFieldName": "content"
}
]
}
]
}
```Refer to the exhibit.
```json
{
"dataSource": {
"name": "my-blob-datasource",
"type": "azureblob",
"credentials": {
"connectionString": "DefaultEndpointsProtocol=https;AccountName=mystorage;AccountKey=...;EndpointSuffix=core.windows.net"
},
"container": {
"name": "documents",
"query": ""
}
}
}
```Refer to the exhibit.
{
"name": "my-skillset",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
"categories": [ "Organization", "Person", "Location" ],
"defaultLanguageCode": "en",
"inputs": [ { "name": "text", "source": "/document/content" } ],
"outputs": [ { "name": "entities", "targetName": "entities" } ]
},
{
"@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
"defaultLanguageCode": "en",
"inputs": [ { "name": "text", "source": "/document/content" } ],
"outputs": [ { "name": "keyPhrases", "targetName": "keyPhrases" } ]
}
]
}Refer to the exhibit.
{
"$schema": "https://schemas.microsoft.com/azure/cosmosdb/2019-08-01",
"datasources": [
{
"name": "my-cosmosdb",
"type": "cosmosdb",
"credentials": { "connectionString": "AccountEndpoint=https://mycosmos.documents.azure.com:443/;AccountKey=...;" },
"container": { "name": "mycontainer", "query": "SELECT c.id, c.title, c.content FROM c WHERE c._ts > @HighWaterMark ORDER BY c._ts" }
}
]
}Refer to the exhibit.
{
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "split",
"context": "/document",
"textSplitMode": "pages",
"maximumPageLength": 5000,
"pageOverlapLength": 500,
"inputs": [
{ "name": "text", "source": "/document/content" }
],
"outputs": [
{ "name": "textItems", "targetName": "pages" }
]
},
{
"@odata.type": "#Microsoft.Skills.Text.EntityRecognitionSkill",
"name": "entities",
"context": "/document/pages/*",
"categories": [ "Person", "Organization" ],
"inputs": [
{ "name": "text", "source": "/document/pages/*" }
],
"outputs": [
{ "name": "entities", "targetName": "entities" }
]
}
]
}Refer to the exhibit.
{
"value": [
{
"name": "myindexer",
"status": "running",
"lastResult": {
"status": "success",
"errorCount": 0,
"warningCount": 5
}
}
]
}Refer to the exhibit.
{
"@odata.context": "https://mysearch.search.windows.net/$metadata#indexers/$entity",
"name": "blob-indexer",
"dataSourceName": "blob-datasource",
"targetIndexName": "myindex",
"skillsetName": "my-skillset",
"schedule": {
"interval": "PT1H",
"startTime": "2025-01-01T00:00:00Z"
},
"parameters": {
"batchSize": 10,
"maxFailedItems": 5,
"maxFailedItemsPerBatch": 2
},
"fieldMappings": [],
"outputFieldMappings": [],
"status": "running"
}{
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "#1",
"context": "/document",
"inputs": [
{
"name": "text",
"source": "/document/content"
},
{
"name": "languageCode",
"source": "/document/language"
}
],
"outputs": [
{
"name": "textItems",
"targetName": "pages"
}
],
"textSplitMode": "pages",
"maximumPageLength": 5000
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
"name": "#2",
"context": "/document/pages/*",
"inputs": [
{
"name": "text",
"source": "/document/pages/*"
},
{
"name": "languageCode",
"source": "/document/language"
}
],
"outputs": [
{
"name": "entities",
"targetName": "entities"
}
]
}
]
}{
"indexer": {
"dataSourceName": "blob-datasource",
"targetIndexName": "knowledge-index",
"schedule": {
"interval": "PT1H"
},
"parameters": {
"batchSize": 10,
"maxFailedItems": 5,
"maxFailedItemsPerBatch": 5
}
}
}{
"searchResults": {
"value": [
{
"@search.score": 2.345,
"content": "The quick brown fox jumps over the lazy dog.",
"metadata_storage_path": "https://storage.blob.core.windows.net/documents/doc1.pdf"
},
{
"@search.score": 1.234,
"content": "A fast brown fox leaps over a sleepy dog.",
"metadata_storage_path": "https://storage.blob.core.windows.net/documents/doc2.pdf"
}
]
}
}{
"skillset": {
"cognitiveServices": "/subscriptions/.../providers/Microsoft.CognitiveServices/accounts/my-cog-svc",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
"categories": ["Person", "Organization", "Location"]
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.KeyPhraseExtractionSkill"
}
]
}
}{
"skillset": {
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "split",
"context": "/document",
"textSplitMode": "pages",
"maximumPageLength": 5000,
"pageOverlapLength": 500,
"defaultLanguageCode": "en",
"inputs": [
{ "name": "text", "source": "/document/content" }
],
"outputs": [
{ "name": "textItems", "targetName": "pages" }
]
},
{
"@odata.type": "#Microsoft.Skills.Text.V3.SentimentSkill",
"name": "sentiment",
"context": "/document/pages/*",
"defaultLanguageCode": "en",
"inputs": [
{ "name": "text", "source": "/document/pages/*" }
],
"outputs": [
{ "name": "sentiment", "targetName": "sentimentLabel" },
{ "name": "confidenceScores", "targetName": "confidenceScores" }
]
}
]
}
}You are developing a knowledge mining solution for a legal firm that needs to process thousands of legal contracts stored as PDFs in Azure Blob Storage. The solution must extract clauses, parties, and dates using a custom model. You are using Microsoft Foundry with Azure AI Search and Azure AI Document Intelligence. The custom model must be trained on labeled contract data. After training, you deploy the model and integrate it into the AI Search enrichment pipeline. The pipeline must also perform OCR for scanned contracts. You have configured the following:
- A custom classification model in Document Intelligence for document types. - A custom extraction model in Document Intelligence for clauses, parties, and dates. - An Azure AI Search index with fields: clause, party, date. - A skillset with a Document Intelligence skill pointing to the custom extraction model.
During testing, the pipeline runs successfully for digital PDFs but fails for scanned PDFs. The error indicates that OCR is not being applied. What should you do to fix the issue?